From 206f25c2c1e8e28c6dfdfbed89d28273c325ceac Mon Sep 17 00:00:00 2001 From: acezen Date: Wed, 8 Nov 2023 16:12:21 +0800 Subject: [PATCH 1/2] Complement the error messages of spark --- .../scala/com/alibaba/graphar/EdgeInfo.scala | 62 ++++++++++++++----- .../scala/com/alibaba/graphar/GraphInfo.scala | 12 ++-- .../com/alibaba/graphar/VertexInfo.scala | 10 +-- .../alibaba/graphar/datasources/GarScan.scala | 6 +- .../graphar/datasources/GarScanBuilder.scala | 6 +- .../graphar/datasources/GarTable.scala | 6 +- .../graphar/graph/GraphTransformer.scala | 12 +++- .../alibaba/graphar/graph/GraphWriter.scala | 10 ++- .../alibaba/graphar/reader/EdgeReader.scala | 23 +++++-- .../alibaba/graphar/reader/VertexReader.scala | 8 ++- .../alibaba/graphar/writer/EdgeWriter.scala | 14 ++++- .../alibaba/graphar/writer/VertexWriter.scala | 8 ++- 12 files changed, 128 insertions(+), 49 deletions(-) diff --git a/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala b/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala index 950033d4d..52f6dc5ba 100644 --- a/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala +++ b/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala @@ -75,7 +75,11 @@ class EdgeInfo() { return str } } - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type not found: " + AdjListType.AdjListTypeToString( + adj_list_type + ) + ) } /** @@ -95,7 +99,11 @@ class EdgeInfo() { return adj_list.getFile_type_in_gar } } - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type not found: " + AdjListType.AdjListTypeToString( + adj_list_type + ) + ) } /** @@ -117,7 +125,11 @@ class EdgeInfo() { return adj_list.getProperty_groups } } - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type not found: " + AdjListType.AdjListTypeToString( + adj_list_type + ) + ) } /** @@ -216,7 +228,9 @@ class EdgeInfo() { } } } - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type or property group not found." + ) } /** @@ -245,7 +259,7 @@ class EdgeInfo() { } } } - throw new IllegalArgumentException + throw new IllegalArgumentException("property not found: " + property_name) } /** @@ -275,7 +289,7 @@ class EdgeInfo() { } } } - throw new IllegalArgumentException + throw new IllegalArgumentException("property not found: " + property_name) } /** Get Primary key of edge info. */ @@ -337,7 +351,11 @@ class EdgeInfo() { */ def getVerticesNumFilePath(adj_list_type: AdjListType.Value): String = { if (containAdjList(adj_list_type) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type not found: " + AdjListType.AdjListTypeToString( + adj_list_type + ) + ) } val str: String = prefix + getAdjListPrefix(adj_list_type) + "vertex_count" return str @@ -354,7 +372,11 @@ class EdgeInfo() { */ def getEdgesNumPathPrefix(adj_list_type: AdjListType.Value): String = { if (containAdjList(adj_list_type) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type not found: " + AdjListType.AdjListTypeToString( + adj_list_type + ) + ) } val str: String = prefix + getAdjListPrefix(adj_list_type) + "edge_count" return str @@ -376,7 +398,11 @@ class EdgeInfo() { adj_list_type: AdjListType.Value ): String = { if (containAdjList(adj_list_type) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type not found: " + AdjListType.AdjListTypeToString( + adj_list_type + ) + ) } val str: String = prefix + getAdjListPrefix(adj_list_type) + "edge_count" + chunk_index.toString() @@ -400,7 +426,11 @@ class EdgeInfo() { adj_list_type: AdjListType.Value ): String = { if (containAdjList(adj_list_type) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type not found: " + AdjListType.AdjListTypeToString( + adj_list_type + ) + ) } val str: String = prefix + getAdjListPrefix(adj_list_type) + "offset/chunk" + @@ -419,7 +449,11 @@ class EdgeInfo() { */ def getOffsetPathPrefix(adj_list_type: AdjListType.Value): String = { if (containAdjList(adj_list_type) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type not found: " + AdjListType.AdjListTypeToString( + adj_list_type + ) + ) } return prefix + getAdjListPrefix(adj_list_type) + "offset/" } @@ -503,7 +537,7 @@ class EdgeInfo() { chunk_index: Long ): String = { if (containPropertyGroup(property_group, adj_list_type) == false) - throw new IllegalArgumentException + throw new IllegalArgumentException("property group not found.") var str: String = property_group.getPrefix if (str == "") { val properties = property_group.getProperties @@ -540,7 +574,7 @@ class EdgeInfo() { vertex_chunk_index: Long ): String = { if (containPropertyGroup(property_group, adj_list_type) == false) - throw new IllegalArgumentException + throw new IllegalArgumentException("property group not found.") var str: String = property_group.getPrefix if (str == "") { val properties = property_group.getProperties @@ -573,7 +607,7 @@ class EdgeInfo() { adj_list_type: AdjListType.Value ): String = { if (containPropertyGroup(property_group, adj_list_type) == false) - throw new IllegalArgumentException + throw new IllegalArgumentException("property group not found.") var str: String = property_group.getPrefix if (str == "") { val properties = property_group.getProperties diff --git a/spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala b/spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala index 29ddc9040..a05a236f2 100644 --- a/spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala +++ b/spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala @@ -63,7 +63,7 @@ object GarType extends Enumeration { case GarType.DOUBLE => "double" case GarType.STRING => "string" case GarType.ARRAY => "array" - case _ => throw new IllegalArgumentException + case _ => throw new IllegalArgumentException("Unknown data type") } /** @@ -82,7 +82,7 @@ object GarType extends Enumeration { case "double" => GarType.DOUBLE case "string" => GarType.STRING case "array" => GarType.ARRAY - case _ => throw new IllegalArgumentException + case _ => throw new IllegalArgumentException("Unknown data type: " + str) } } @@ -105,7 +105,7 @@ object FileType extends Enumeration { case FileType.CSV => "csv" case FileType.PARQUET => "parquet" case FileType.ORC => "orc" - case _ => throw new IllegalArgumentException + case _ => throw new IllegalArgumentException("Unknown file type") } /** @@ -120,7 +120,7 @@ object FileType extends Enumeration { case "csv" => FileType.CSV case "parquet" => FileType.PARQUET case "orc" => FileType.ORC - case _ => throw new IllegalArgumentException + case _ => throw new IllegalArgumentException("Unknown file type: " + str) } } @@ -155,7 +155,7 @@ object AdjListType extends Enumeration { case AdjListType.unordered_by_dest => "unordered_by_dest" case AdjListType.ordered_by_source => "ordered_by_source" case AdjListType.ordered_by_dest => "ordered_by_dest" - case _ => throw new IllegalArgumentException + case _ => throw new IllegalArgumentException("Unknown adjList type") } /** String to adjList type in gar */ @@ -164,7 +164,7 @@ object AdjListType extends Enumeration { case "unordered_by_dest" => AdjListType.unordered_by_dest case "ordered_by_source" => AdjListType.ordered_by_source case "ordered_by_dest" => AdjListType.ordered_by_dest - case _ => throw new IllegalArgumentException + case _ => throw new IllegalArgumentException("Unknown adjList type: " + str) } } diff --git a/spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala b/spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala index a57a6a59f..7cedeb135 100644 --- a/spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala +++ b/spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala @@ -93,7 +93,7 @@ class VertexInfo() { } } } - throw new IllegalArgumentException + throw new IllegalArgumentException("Property not found: " + property_name) } /** @@ -117,7 +117,7 @@ class VertexInfo() { } } } - throw new IllegalArgumentException + throw new IllegalArgumentException("Property not found: " + property_name) } /** @@ -141,7 +141,7 @@ class VertexInfo() { } } } - throw new IllegalArgumentException + throw new IllegalArgumentException("Property not found: " + property_name) } /** @@ -205,7 +205,7 @@ class VertexInfo() { */ def getFilePath(property_group: PropertyGroup, chunk_index: Long): String = { if (containPropertyGroup(property_group) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException("Property group not found.") } var str: String = "" if (property_group.getPrefix == "") { @@ -234,7 +234,7 @@ class VertexInfo() { */ def getPathPrefix(property_group: PropertyGroup): String = { if (containPropertyGroup(property_group) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException("Property group not found.") } var str: String = "" if (property_group.getPrefix == "") { diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala b/spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala index 7d538c75a..54348a0e7 100644 --- a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala +++ b/spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala @@ -72,7 +72,8 @@ case class GarScan( case "csv" => createCSVReaderFactory() case "orc" => createOrcReaderFactory() case "parquet" => createParquetReaderFactory() - case _ => throw new IllegalArgumentException + case _ => + throw new IllegalArgumentException("Invalid format name: " + formatName) } // Create the reader factory for the CSV format. @@ -269,7 +270,8 @@ case class GarScan( case "csv" => super.hashCode() case "orc" => getClass.hashCode() case "parquet" => getClass.hashCode() - case _ => throw new IllegalArgumentException + case _ => + throw new IllegalArgumentException("Invalid format name: " + formatName) } /** Get the description string of the object. */ diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala b/spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala index 1a77997d6..40c87f77a 100644 --- a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala +++ b/spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala @@ -55,7 +55,8 @@ case class GarScanBuilder( case "csv" => Array.empty[Filter] case "orc" => pushedOrcFilters case "parquet" => pushedParquetFilters - case _ => throw new IllegalArgumentException + case _ => + throw new IllegalArgumentException("Invalid format name: " + formatName) } private lazy val pushedParquetFilters: Array[Filter] = { @@ -87,7 +88,8 @@ case class GarScanBuilder( case "orc" => sparkSession.sessionState.conf.nestedSchemaPruningEnabled case "parquet" => sparkSession.sessionState.conf.nestedSchemaPruningEnabled - case _ => throw new IllegalArgumentException + case _ => + throw new IllegalArgumentException("Invalid format name: " + formatName) } /** Build the file scan for GarDataSource. */ diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala b/spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala index 4f174bdc1..da83999c9 100644 --- a/spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala +++ b/spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala @@ -81,7 +81,8 @@ case class GarTable( OrcUtils.inferSchema(sparkSession, files, options.asScala.toMap) case "parquet" => ParquetUtils.inferSchema(sparkSession, options.asScala.toMap, files) - case _ => throw new IllegalArgumentException + case _ => + throw new IllegalArgumentException("Invalid format name: " + formatName) } /** Construct a new write builder according to the actual file format. */ @@ -93,7 +94,8 @@ case class GarTable( new OrcWriteBuilder(paths, formatName, supportsDataType, info) case "parquet" => new ParquetWriteBuilder(paths, formatName, supportsDataType, info) - case _ => throw new IllegalArgumentException + case _ => + throw new IllegalArgumentException("Invalid format name: " + formatName) } /** diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala b/spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala index 9c6792231..f7d814922 100644 --- a/spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala +++ b/spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala @@ -59,7 +59,9 @@ object GraphTransformer { // load source vertex info val label = dest_vertex_info.getLabel() if (!sourceVertexInfosMap.contains(label)) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "vertex info of " + label + " not found in graph info." + ) } val source_vertex_info = sourceVertexInfosMap(label) // read vertex chunks from the source graph @@ -105,7 +107,9 @@ object GraphTransformer { // load source edge info val key = dest_edge_info.getConcatKey() if (!sourceEdgeInfosMap.contains(key)) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "edge info of " + key + " not found in graph info." + ) } val source_edge_info = sourceEdgeInfosMap(key) var has_loaded = false @@ -146,7 +150,9 @@ object GraphTransformer { dest_edge_info.getDst_label } if (!sourceVertexInfosMap.contains(vertex_label)) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "vertex info of " + vertex_label + " not found in graph info." + ) } val vertex_info = sourceVertexInfosMap(vertex_label) val reader = new VertexReader(source_prefix, vertex_info, spark) diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala b/spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala index 39490c581..ef422c9dd 100644 --- a/spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala +++ b/spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala @@ -47,7 +47,9 @@ class GraphWriter() { primaryKey: String = "" ): Unit = { if (vertices.exists(_._1 == label)) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "Vertex data of label " + label + " has been put." + ) } vertices += label -> df vertexNums += label -> df.count @@ -63,7 +65,9 @@ class GraphWriter() { */ def PutEdgeData(relation: (String, String, String), df: DataFrame): Unit = { if (edges.exists(_._1 == relation)) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "Edge data of relation " + relation + " has been put." + ) } edges += relation -> df } @@ -195,7 +199,7 @@ class GraphWriter() { case (key, df) => { edge_schemas += key -> new StructType( df.schema.drop(2).toArray - ) // drop the src, dst fileds + ) // drop the src, dst fields } } val graph_info = Utils.generateGraphInfo( diff --git a/spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala b/spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala index a9876bec9..2db0e0d4d 100644 --- a/spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala +++ b/spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala @@ -47,7 +47,12 @@ class EdgeReader( spark: SparkSession ) { if (edgeInfo.containAdjList(adjListType) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "Edge info does not contain adj list type: " + AdjListType + .AdjListTypeToString( + adjListType + ) + ) } /** Load the total number of src/dst vertices for this edge type. */ @@ -111,7 +116,9 @@ class EdgeReader( if ( adjListType != AdjListType.ordered_by_source && adjListType != AdjListType.ordered_by_dest ) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "Adj list type must be ordered_by_source or ordered_by_dest." + ) } val file_type_in_gar = edgeInfo.getAdjListFileType(adjListType) val file_type = FileType.FileTypeToString(file_type_in_gar) @@ -228,7 +235,9 @@ class EdgeReader( chunk_index: Long ): DataFrame = { if (edgeInfo.containPropertyGroup(propertyGroup, adjListType) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "Edge info does not contain property group or adj list type." + ) } val file_type = propertyGroup.getFile_type() val file_path = prefix + edgeInfo.getPropertyFilePath( @@ -265,7 +274,9 @@ class EdgeReader( addIndex: Boolean = true ): DataFrame = { if (edgeInfo.containPropertyGroup(propertyGroup, adjListType) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "Edge info does not contain property group or adj list type." + ) } val file_type = propertyGroup.getFile_type() val file_path = prefix + edgeInfo.getPropertyGroupPathPrefix( @@ -301,7 +312,9 @@ class EdgeReader( addIndex: Boolean = true ): DataFrame = { if (edgeInfo.containPropertyGroup(propertyGroup, adjListType) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "Edge info does not contain property group or adj list type." + ) } val file_type = propertyGroup.getFile_type() val file_path = diff --git a/spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala b/spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala index 6c77e54ab..5df22355b 100644 --- a/spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala +++ b/spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala @@ -65,7 +65,9 @@ class VertexReader( chunk_index: Long ): DataFrame = { if (!vertexInfo.containPropertyGroup(propertyGroup)) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "property group not contained in vertex info." + ) } val file_type = propertyGroup.getFile_type() val file_path = prefix + vertexInfo.getFilePath(propertyGroup, chunk_index) @@ -93,7 +95,9 @@ class VertexReader( addIndex: Boolean = true ): DataFrame = { if (!vertexInfo.containPropertyGroup(propertyGroup)) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "property group not contained in vertex info." + ) } val file_type = propertyGroup.getFile_type() val file_path = prefix + vertexInfo.getPathPrefix(propertyGroup) diff --git a/spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala b/spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala index ac4a11f4d..032107d2f 100644 --- a/spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala +++ b/spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala @@ -220,7 +220,11 @@ class EdgeWriter( private def validate(): Unit = { // chunk if edge info contains the adj list type if (edgeInfo.containAdjList(adjListType) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "adj list type: " + AdjListType.AdjListTypeToString( + adjListType + ) + " not found in edge info." + ) } // check the src index and dst index column exist val src_filed = StructField(GeneralParams.srcIndexCol, LongType) @@ -229,7 +233,9 @@ class EdgeWriter( if ( schema.contains(src_filed) == false || schema.contains(dst_filed) == false ) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "edge DataFrame must contain src index column and dst index column." + ) } } @@ -319,7 +325,9 @@ class EdgeWriter( */ def writeEdgeProperties(propertyGroup: PropertyGroup): Unit = { if (edgeInfo.containPropertyGroup(propertyGroup, adjListType) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "property group not contained in edge info." + ) } val propertyList = ArrayBuffer[String]() diff --git a/spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala b/spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala index 81cb7b11a..8b764a528 100644 --- a/spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala +++ b/spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala @@ -81,7 +81,9 @@ class VertexWriter( // check if vertex DataFrame contains the index_filed val index_filed = StructField(GeneralParams.vertexIndexCol, LongType) if (vertexDf.schema.contains(index_filed) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "vertex DataFrame must contain index column." + ) } } @@ -103,7 +105,9 @@ class VertexWriter( def writeVertexProperties(propertyGroup: PropertyGroup): Unit = { // check if contains the property group if (vertexInfo.containPropertyGroup(propertyGroup) == false) { - throw new IllegalArgumentException + throw new IllegalArgumentException( + "property group not contained in vertex info." + ) } // write out the chunks From e6b52dbb30069c4241ed2c45b7c7b47b395c9d49 Mon Sep 17 00:00:00 2001 From: acezen Date: Wed, 8 Nov 2023 16:20:57 +0800 Subject: [PATCH 2/2] Fix --- spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala b/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala index 52f6dc5ba..cd41c5816 100644 --- a/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala +++ b/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala @@ -225,6 +225,12 @@ class EdgeInfo() { return pg } } + throw new IllegalArgumentException( + "property group not found: " + property_name + " in adj list type: " + AdjListType + .AdjListTypeToString( + adj_list_type + ) + ) } } }