Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ package org.apache.hudi
import org.apache.hudi.avro.model.HoodieClusteringGroup
import org.apache.hudi.client.SparkRDDWriteClient
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
import org.apache.spark.SparkException
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.withSparkConf

import scala.collection.JavaConverters.{collectionAsScalaIterableConverter, mapAsJavaMapConverter}
Expand Down Expand Up @@ -57,4 +60,16 @@ object HoodieCLIUtils {

partitionPaths.sorted.mkString(",")
}

def getHoodieCatalogTable(sparkSession: SparkSession, table: String): HoodieCatalogTable = {
val seq: Seq[String] = table.split('.')
seq match {
case Seq(tableName) =>
HoodieCatalogTable(sparkSession, TableIdentifier(tableName))
case Seq(database, tableName) =>
HoodieCatalogTable(sparkSession, TableIdentifier(tableName, Some(database)))
case _ =>
throw new SparkException(s"Unsupported identifier $table")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@

package org.apache.spark.sql.hudi.command.procedures

import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.client.SparkRDDWriteClient
import org.apache.hudi.client.common.HoodieSparkEngineContext
import org.apache.hudi.common.model.HoodieRecordPayload
import org.apache.hudi.config.{HoodieIndexConfig, HoodieWriteConfig}
import org.apache.hudi.exception.HoodieClusteringException
import org.apache.hudi.index.HoodieIndex.IndexType
import org.apache.spark.SparkException
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
Expand Down Expand Up @@ -112,7 +114,7 @@ abstract class BaseProcedure extends Procedure {

protected def getBasePath(tableName: Option[Any], tablePath: Option[Any] = Option.empty): String = {
tableName.map(
t => HoodieCatalogTable(sparkSession, new TableIdentifier(t.asInstanceOf[String])).tableLocation)
t => HoodieCLIUtils.getHoodieCatalogTable(sparkSession, t.asInstanceOf[String]).tableLocation)
.getOrElse(
tablePath.map(p => p.asInstanceOf[String]).getOrElse(
throw new HoodieClusteringException("Table name or table path must be given one"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql.hudi.command.procedures

import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.HoodieTimeline
import org.apache.spark.sql.Row
Expand Down Expand Up @@ -47,7 +48,7 @@ class CommitsCompareProcedure() extends BaseProcedure with ProcedureBuilder {
val table = getArgValueOrDefault(args, PARAMETERS(0)).get.asInstanceOf[String]
val path = getArgValueOrDefault(args, PARAMETERS(1)).get.asInstanceOf[String]

val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
val hoodieCatalogTable = HoodieCLIUtils.getHoodieCatalogTable(sparkSession, table)
val basePath = hoodieCatalogTable.tableLocation
val source = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build
val target = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(path).build
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.sql.hudi.command.procedures
import org.apache.avro.generic.GenericRecord
import org.apache.avro.specific.SpecificData
import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.avro.HoodieAvroUtils
import org.apache.hudi.avro.model.HoodieArchivedMetaEntry
import org.apache.hudi.common.fs.FSUtils
Expand Down Expand Up @@ -72,7 +73,7 @@ class ExportInstantsProcedure extends BaseProcedure with ProcedureBuilder with L
val actions: String = getArgValueOrDefault(args, PARAMETERS(3)).get.asInstanceOf[String]
val desc = getArgValueOrDefault(args, PARAMETERS(4)).get.asInstanceOf[Boolean]

val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
val hoodieCatalogTable = HoodieCLIUtils.getHoodieCatalogTable(sparkSession, table)
val basePath = hoodieCatalogTable.tableLocation
val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build
val archivePath = new Path(basePath + "/.hoodie/.commits_.archive*")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql.hudi.command.procedures

import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.HoodieTimeline
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion
Expand Down Expand Up @@ -49,7 +50,7 @@ class RollbackToInstantTimeProcedure extends BaseProcedure with ProcedureBuilder
val table = getArgValueOrDefault(args, PARAMETERS(0)).get.asInstanceOf[String]
val instantTime = getArgValueOrDefault(args, PARAMETERS(1)).get.asInstanceOf[String]

val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
val hoodieCatalogTable = HoodieCLIUtils.getHoodieCatalogTable(sparkSession, table)
val basePath = hoodieCatalogTable.tableLocation
val client = createHoodieClient(jsc, basePath)
client.getConfig.setValue(ROLLBACK_USING_MARKERS_ENABLE, "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql.hudi.command.procedures

import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.common.model.HoodieCommitMetadata
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieDefaultTimeline, HoodieInstant}
Expand Down Expand Up @@ -82,7 +83,7 @@ class ShowArchivedCommitsProcedure(includeExtraMetadata: Boolean) extends BasePr
var startTs = getArgValueOrDefault(args, PARAMETERS(2)).get.asInstanceOf[String]
var endTs = getArgValueOrDefault(args, PARAMETERS(3)).get.asInstanceOf[String]

val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
val hoodieCatalogTable = HoodieCLIUtils.getHoodieCatalogTable(sparkSession, table)
val basePath = hoodieCatalogTable.tableLocation
val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql.hudi.command.procedures

import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieReplaceCommitMetadata, HoodieWriteStat}
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
Expand Down Expand Up @@ -61,7 +62,7 @@ class ShowCommitFilesProcedure() extends BaseProcedure with ProcedureBuilder {
val limit = getArgValueOrDefault(args, PARAMETERS(1)).get.asInstanceOf[Int]
val instantTime = getArgValueOrDefault(args, PARAMETERS(2)).get.asInstanceOf[String]

val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
val hoodieCatalogTable = HoodieCLIUtils.getHoodieCatalogTable(sparkSession, table)
val basePath = hoodieCatalogTable.tableLocation
val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build
val activeTimeline = metaClient.getActiveTimeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql.hudi.command.procedures

import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieReplaceCommitMetadata, HoodieWriteStat}
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
Expand Down Expand Up @@ -60,7 +61,7 @@ class ShowCommitPartitionsProcedure() extends BaseProcedure with ProcedureBuilde
val limit = getArgValueOrDefault(args, PARAMETERS(1)).get.asInstanceOf[Int]
val instantTime = getArgValueOrDefault(args, PARAMETERS(2)).get.asInstanceOf[String]

val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
val hoodieCatalogTable = HoodieCLIUtils.getHoodieCatalogTable(sparkSession, table)
val basePath = hoodieCatalogTable.tableLocation
val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build
val activeTimeline = metaClient.getActiveTimeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql.hudi.command.procedures

import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieReplaceCommitMetadata}
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
Expand Down Expand Up @@ -55,7 +56,7 @@ class ShowCommitWriteStatsProcedure() extends BaseProcedure with ProcedureBuilde
val limit = getArgValueOrDefault(args, PARAMETERS(1)).get.asInstanceOf[Int]
val instantTime = getArgValueOrDefault(args, PARAMETERS(2)).get.asInstanceOf[String]

val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
val hoodieCatalogTable = HoodieCLIUtils.getHoodieCatalogTable(sparkSession, table)
val basePath = hoodieCatalogTable.tableLocation
val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build
val activeTimeline = metaClient.getActiveTimeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql.hudi.command.procedures

import org.apache.hudi.HoodieCLIUtils
import org.apache.hudi.common.model.HoodieCommitMetadata
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.{HoodieDefaultTimeline, HoodieInstant}
Expand Down Expand Up @@ -78,7 +79,7 @@ class ShowCommitsProcedure(includeExtraMetadata: Boolean) extends BaseProcedure
val table = getArgValueOrDefault(args, PARAMETERS(0)).get.asInstanceOf[String]
val limit = getArgValueOrDefault(args, PARAMETERS(1)).get.asInstanceOf[Int]

val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
val hoodieCatalogTable = HoodieCLIUtils.getHoodieCatalogTable(sparkSession, table)
val basePath = hoodieCatalogTable.tableLocation
val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.hudi

import org.apache.hudi.{HoodieSparkUtils, SparkAdapterSupport}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.{FileSourceScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
import org.apache.spark.sql.internal.SQLConf
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class HoodieSparkProcedureTestBase extends HoodieSparkSqlTestBase {
override def generateTableName: String = {
s"default.${super.generateTableName}"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ import org.apache.hadoop.fs.Path
import org.apache.hudi.common.model.HoodieTableType
import org.apache.hudi.functional.TestBootstrap
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
import org.apache.spark.sql.{Dataset, Row}

import java.time.Instant
import java.util

class TestBootstrapProcedure extends HoodieSparkSqlTestBase {
class TestBootstrapProcedure extends HoodieSparkProcedureTestBase {

test("Test Call run_bootstrap Procedure") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ package org.apache.spark.sql.hudi.procedure

import org.apache.hudi.common.model.IOType
import org.apache.hudi.common.testutils.FileCreateUtils
import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestCallProcedure extends HoodieSparkSqlTestBase {
class TestCallProcedure extends HoodieSparkProcedureTestBase {

test("Test Call show_commits Procedure") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@

package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestCleanProcedure extends HoodieSparkSqlTestBase {
class TestCleanProcedure extends HoodieSparkProcedureTestBase {

test("Test Call run_clean Procedure by Table") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@ import org.apache.hadoop.fs.Path
import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstant, HoodieTimeline}
import org.apache.hudi.common.util.{Option => HOption}
import org.apache.hudi.{HoodieCLIUtils, HoodieDataSourceHelpers}
import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

import scala.collection.JavaConverters.asScalaIteratorConverter

class TestClusteringProcedure extends HoodieSparkSqlTestBase {
class TestClusteringProcedure extends HoodieSparkProcedureTestBase {

test("Test Call run_clustering Procedure By Table") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestCommitsProcedure extends HoodieSparkSqlTestBase {
class TestCommitsProcedure extends HoodieSparkProcedureTestBase {

test("Test Call show_archived_commits Procedure") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@
package org.apache.spark.sql.hudi.procedure

import org.apache.hudi.common.table.timeline.HoodieInstant
import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestCompactionProcedure extends HoodieSparkSqlTestBase {
class TestCompactionProcedure extends HoodieSparkProcedureTestBase {

test("Test Call run_compaction Procedure by Table") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@
package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.Row
import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

import java.util

class TestCopyToTableProcedure extends HoodieSparkSqlTestBase {
class TestCopyToTableProcedure extends HoodieSparkProcedureTestBase {

test("Test Call copy_to_table Procedure with default params") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestExportInstantsProcedure extends HoodieSparkSqlTestBase {
class TestExportInstantsProcedure extends HoodieSparkProcedureTestBase {

test("Test Call export_instants Procedure") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestFsViewProcedure extends HoodieSparkSqlTestBase {
class TestFsViewProcedure extends HoodieSparkProcedureTestBase {
test("Test Call show_fsview_all Procedure") {
withTempDir { tmp =>
val tableName = generateTableName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,14 @@ import org.apache.parquet.hadoop.ParquetWriter
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase
import org.junit.jupiter.api.Assertions.assertTrue

import java.io.IOException
import java.util
import java.util.Objects
import java.util.concurrent.TimeUnit

class TestHdfsParquetImportProcedure extends HoodieSparkSqlTestBase {
class TestHdfsParquetImportProcedure extends HoodieSparkProcedureTestBase {

test("Test Call hdfs_parquet_import Procedure with insert operation") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@

package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestHoodieLogFileProcedure extends HoodieSparkSqlTestBase {
class TestHoodieLogFileProcedure extends HoodieSparkProcedureTestBase {
test("Test Call show_logfile_metadata Procedure") {
withTempDir { tmp =>
val tableName = generateTableName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestMetadataProcedure extends HoodieSparkSqlTestBase {
class TestMetadataProcedure extends HoodieSparkProcedureTestBase {

test("Test Call delete_metadata_table Procedure") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,13 @@ import org.apache.hudi.common.table.view.HoodieTableFileSystemView
import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, SchemaTestUtil}
import org.apache.hudi.testutils.HoodieSparkWriteableTestTable
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

import java.io.IOException
import java.net.URL
import java.nio.file.{Files, Paths}
import scala.collection.JavaConverters.asScalaIteratorConverter

class TestRepairsProcedure extends HoodieSparkSqlTestBase {
class TestRepairsProcedure extends HoodieSparkProcedureTestBase {

test("Test Call repair_add_partition_meta Procedure") {
withTempDir { tmp =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

package org.apache.spark.sql.hudi.procedure

import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase

class TestSavepointsProcedure extends HoodieSparkSqlTestBase {
class TestSavepointsProcedure extends HoodieSparkProcedureTestBase {

test("Test Call create_savepoint Procedure") {
withTempDir { tmp =>
Expand Down
Loading