Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -4098,14 +4098,13 @@ test_that("catalog APIs, listTables, getTable, listColumns, listFunctions, funct
c("name", "description", "dataType", "nullable", "isPartition", "isBucket"))
expect_equal(collect(c)[[1]][[1]], "speed")
expect_error(listColumns("zxwtyswklpf", "default"),
paste("Error in listColumns : analysis error - Table",
"'zxwtyswklpf' does not exist in database 'default'"))
paste("Table or view not found: spark_catalog.default.zxwtyswklpf"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This actually is a user behavior change as it returns a different error message now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we treat error message change as behavior change. We change error messages from time to time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SG


f <- listFunctions()
expect_true(nrow(f) >= 200) # 250
expect_equal(colnames(f),
c("name", "catalog", "namespace", "description", "className", "isTemporary"))
expect_equal(take(orderBy(f, "className"), 1)$className,
expect_equal(take(orderBy(filter(f, "className IS NOT NULL"), "className"), 1)$className,
"org.apache.spark.sql.catalyst.expressions.Abs")
expect_error(listFunctions("zxwtyswklpf_db"),
paste("Error in listFunctions : no such database - Database",
Expand Down
6 changes: 3 additions & 3 deletions python/pyspark/sql/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def getDatabase(self, dbName: str) -> Database:
Examples
--------
>>> spark.catalog.getDatabase("default")
Database(name='default', catalog=None, description='default database', ...
Database(name='default', catalog='spark_catalog', description='default database', ...
>>> spark.catalog.getDatabase("spark_catalog.default")
Database(name='default', catalog='spark_catalog', description='default database', ...
"""
Expand Down Expand Up @@ -376,9 +376,9 @@ def getFunction(self, functionName: str) -> Function:
--------
>>> func = spark.sql("CREATE FUNCTION my_func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'")
>>> spark.catalog.getFunction("my_func1")
Function(name='my_func1', catalog=None, namespace=['default'], ...
Function(name='my_func1', catalog='spark_catalog', namespace=['default'], ...
>>> spark.catalog.getFunction("default.my_func1")
Function(name='my_func1', catalog=None, namespace=['default'], ...
Function(name='my_func1', catalog='spark_catalog', namespace=['default'], ...
>>> spark.catalog.getFunction("spark_catalog.default.my_func1")
Function(name='my_func1', catalog='spark_catalog', namespace=['default'], ...
>>> spark.catalog.getFunction("my_func2")
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def test_list_functions(self):
self.assertTrue("to_unix_timestamp" in functions)
self.assertTrue("current_database" in functions)
self.assertEqual(functions["+"].name, "+")
self.assertEqual(functions["+"].description, None)
self.assertEqual(functions["+"].description, "expr1 + expr2 - Returns `expr1`+`expr2`.")
self.assertEqual(
functions["+"].className, "org.apache.spark.sql.catalyst.expressions.Add"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -971,13 +971,17 @@ class SessionCatalog(
}

def lookupTempView(name: TableIdentifier): Option[View] = {
val tableName = formatTableName(name.table)
if (name.database.isEmpty) {
tempViews.get(tableName).map(getTempViewPlan)
} else if (formatDatabaseName(name.database.get) == globalTempViewManager.database) {
globalTempViewManager.get(tableName).map(getTempViewPlan)
} else {
None
lookupLocalOrGlobalRawTempView(name.database.toSeq :+ name.table).map(getTempViewPlan)
}

/**
* Return the raw logical plan of a temporary local or global view for the given name.
*/
def lookupLocalOrGlobalRawTempView(name: Seq[String]): Option[TemporaryViewRelation] = {
name match {
case Seq(v) => getRawTempView(v)
case Seq(db, v) if isGlobalTempViewDB(db) => getRawGlobalTempView(v)
case _ => None
}
}

Expand Down
102 changes: 54 additions & 48 deletions sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,36 +33,37 @@ import org.apache.spark.storage.StorageLevel
abstract class Catalog {

/**
* Returns the current default database in this session.
* Returns the current database (namespace) in this session.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is a different way to refer to here: schema.

Are we have decided to use namespace in Spark?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

namespace is more like the official name. database/schema is only for the hive catalog. We can change database to database/schema though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good. Just wanted to confirm that we don't miss anything obvious.

*
* @since 2.0.0
*/
def currentDatabase: String

/**
* Sets the current default database in this session.
* Sets the current database (namespace) in this session.
*
* @since 2.0.0
*/
def setCurrentDatabase(dbName: String): Unit

/**
* Returns a list of databases available across all sessions.
* Returns a list of databases (namespaces) available within the current catalog.
*
* @since 2.0.0
*/
def listDatabases(): Dataset[Database]

/**
* Returns a list of tables/views in the current database.
* Returns a list of tables/views in the current database (namespace).
* This includes all temporary views.
*
* @since 2.0.0
*/
def listTables(): Dataset[Table]

/**
* Returns a list of tables/views in the specified database.
* Returns a list of tables/views in the specified database (namespace) (the name can be qualified
* with catalog).
* This includes all temporary views.
*
* @since 2.0.0
Expand All @@ -71,16 +72,17 @@ abstract class Catalog {
def listTables(dbName: String): Dataset[Table]

/**
* Returns a list of functions registered in the current database.
* This includes all temporary functions
* Returns a list of functions registered in the current database (namespace).
* This includes all temporary functions.
*
* @since 2.0.0
*/
def listFunctions(): Dataset[Function]

/**
* Returns a list of functions registered in the specified database.
* This includes all temporary functions
* Returns a list of functions registered in the specified database (namespace) (the name can be
* qualified with catalog).
* This includes all built-in and temporary functions.
*
* @since 2.0.0
*/
Expand All @@ -90,30 +92,31 @@ abstract class Catalog {
/**
* Returns a list of columns for the given table/view or temporary view.
*
* @param tableName is either a qualified or unqualified name that designates a table/view.
* If no database identifier is provided, it refers to a temporary view or
* a table/view in the current database.
* @param tableName is either a qualified or unqualified name that designates a table/view. It
* follows the same resolution rule with SQL: search for temp views first then
* table/views in the current database (namespace).
* @since 2.0.0
*/
@throws[AnalysisException]("table does not exist")
def listColumns(tableName: String): Dataset[Column]

/**
* Returns a list of columns for the given table/view in the specified database.
* Returns a list of columns for the given table/view in the specified database under the Hive
* Metastore.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 this looks nice to explicitly says HMS

*
* This API does not support 3 layer namespace since 3.4.0. To use 3 layer namespace,
* use listColumns(tableName) instead.
* To list columns for table/view in other catalogs, please use `listColumns(tableName)` with
* qualified table/view name instead.
*
* @param dbName is a name that designates a database.
* @param dbName is an unqualified name that designates a database.
* @param tableName is an unqualified name that designates a table/view.
* @since 2.0.0
*/
@throws[AnalysisException]("database or table does not exist")
def listColumns(dbName: String, tableName: String): Dataset[Column]

/**
* Get the database with the specified name. This throws an AnalysisException when the database
* cannot be found.
* Get the database (namespace) with the specified name (can be qualified with catalog). This
* throws an AnalysisException when the database (namespace) cannot be found.
*
* @since 2.1.0
*/
Expand All @@ -124,20 +127,20 @@ abstract class Catalog {
* Get the table or view with the specified name. This table can be a temporary view or a
* table/view. This throws an AnalysisException when no Table can be found.
*
* @param tableName is either a qualified or unqualified name that designates a table/view.
* If no database identifier is provided, it refers to a table/view in
* the current database.
* @param tableName is either a qualified or unqualified name that designates a table/view. It
* follows the same resolution rule with SQL: search for temp views first then
* table/views in the current database (namespace).
* @since 2.1.0
*/
@throws[AnalysisException]("table does not exist")
def getTable(tableName: String): Table

/**
* Get the table or view with the specified name in the specified database. This throws an
* AnalysisException when no Table can be found.
* Get the table or view with the specified name in the specified database under the Hive
* Metastore. This throws an AnalysisException when no Table can be found.
*
* This API does not support 3 layer namespace since 3.4.0. To use 3 layer namespace,
* use getTable(tableName) instead.
* To get table/view in other catalogs, please use `getTable(tableName)` with qualified table/view
* name instead.
*
* @since 2.1.0
*/
Expand All @@ -148,30 +151,31 @@ abstract class Catalog {
* Get the function with the specified name. This function can be a temporary function or a
* function. This throws an AnalysisException when the function cannot be found.
*
* @param functionName is either a qualified or unqualified name that designates a function.
* If no database identifier is provided, it refers to a temporary function
* or a function in the current database.
* @param functionName is either a qualified or unqualified name that designates a function. It
* follows the same resolution rule with SQL: search for built-in/temp
* functions first then functions in the current database (namespace).
* @since 2.1.0
*/
@throws[AnalysisException]("function does not exist")
def getFunction(functionName: String): Function

/**
* Get the function with the specified name. This throws an AnalysisException when the function
* cannot be found.
* Get the function with the specified name in the specified database under the Hive Metastore.
* This throws an AnalysisException when the function cannot be found.
*
* This API does not support 3 layer namespace since 3.4.0. To use 3 layer namespace,
* use getFunction(functionName) instead.
* To get functions in other catalogs, please use `getFunction(functionName)` with qualified
* function name instead.
*
* @param dbName is a name that designates a database.
* @param dbName is an unqualified name that designates a database.
* @param functionName is an unqualified name that designates a function in the specified database
* @since 2.1.0
*/
@throws[AnalysisException]("database or function does not exist")
def getFunction(dbName: String, functionName: String): Function

/**
* Check if the database with the specified name exists.
* Check if the database (namespace) with the specified name exists (the name can be qualified
* with catalog).
*
* @since 2.1.0
*/
Expand All @@ -181,20 +185,21 @@ abstract class Catalog {
* Check if the table or view with the specified name exists. This can either be a temporary
* view or a table/view.
*
* @param tableName is either a qualified or unqualified name that designates a table/view.
* If no database identifier is provided, it refers to a table/view in
* the current database.
* @param tableName is either a qualified or unqualified name that designates a table/view. It
* follows the same resolution rule with SQL: search for temp views first then
* table/views in the current database (namespace).
* @since 2.1.0
*/
def tableExists(tableName: String): Boolean

/**
* Check if the table or view with the specified name exists in the specified database.
* Check if the table or view with the specified name exists in the specified database under the
* Hive Metastore.
*
* This API does not support 3 layer namespace since 3.4.0. To use 3 layer namespace,
* use tableExists(tableName) instead.
* To check existence of table/view in other catalogs, please use `tableExists(tableName)` with
* qualified table/view name instead.
*
* @param dbName is a name that designates a database.
* @param dbName is an unqualified name that designates a database.
* @param tableName is an unqualified name that designates a table.
* @since 2.1.0
*/
Expand All @@ -204,20 +209,21 @@ abstract class Catalog {
* Check if the function with the specified name exists. This can either be a temporary function
* or a function.
*
* @param functionName is either a qualified or unqualified name that designates a function.
* If no database identifier is provided, it refers to a function in
* the current database.
* @param functionName is either a qualified or unqualified name that designates a function. It
* follows the same resolution rule with SQL: search for built-in/temp
* functions first then functions in the current database (namespace).
* @since 2.1.0
*/
def functionExists(functionName: String): Boolean

/**
* Check if the function with the specified name exists in the specified database.
* Check if the function with the specified name exists in the specified database under the
* Hive Metastore.
*
* This API does not support 3 layer namespace since 3.4.0. To use 3 layer namespace,
* use functionExists(functionName) instead.
* To check existence of functions in other catalogs, please use `functionExists(functionName)`
* with qualified function name instead.
*
* @param dbName is a name that designates a database.
* @param dbName is an unqualified name that designates a database.
* @param functionName is an unqualified name that designates a function.
* @since 2.1.0
*/
Expand Down
Loading