Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions azure-pipelines-20230430.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ parameters:
- 'hudi-spark-datasource'
- 'hudi-spark-datasource/hudi-spark'
- 'hudi-spark-datasource/hudi-spark3.5.x'
- 'hudi-spark-datasource/hudi-spark3-common'
- 'hudi-spark-datasource/hudi-spark-common'
- name: job10UTModules
type: object
Expand All @@ -52,7 +51,6 @@ parameters:
- '!hudi-spark-datasource'
- '!hudi-spark-datasource/hudi-spark'
- '!hudi-spark-datasource/hudi-spark3.5.x'
- '!hudi-spark-datasource/hudi-spark3-common'
- '!hudi-spark-datasource/hudi-spark-common'
- '!hudi-utilities'
- name: job10FTModules
Expand Down
2 changes: 0 additions & 2 deletions hudi-spark-datasource/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,12 @@ This repo contains the code that integrate Hudi with Spark. The repo is split in
`hudi-spark3.3.x`
`hudi-spark3.4.x`
`hudi-spark3.5.x`
`hudi-spark3-common`
`hudi-spark-common`

* hudi-spark is the module that contains the code that spark3 version would share.
* hudi-spark3.3.x is the module that contains the code that compatible with spark3.3.x versions.
* hudi-spark3.4.x is the module that contains the code that compatible with spark 3.4.x versions.
* hudi-spark3.5.x is the module that contains the code that compatible with spark 3.5.x versions.
* hudi-spark3-common is the module that contains the code that would be reused between spark3.x versions.
* hudi-spark-common is the module that contains the code that would be reused between spark3.x versions.

## Description of Time Travel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ protected Option<HoodieData<WriteStatus>> doExecute(Dataset<Row> records, boolea
String targetFormat;
Map<String, String> customOpts = new HashMap<>(1);
if (HoodieSparkUtils.isSpark3()) {
targetFormat = "org.apache.hudi.spark3.internal";
targetFormat = "org.apache.hudi.spark.internal";
customOpts.put(HoodieInternalConfig.BULKINSERT_INPUT_DATA_SCHEMA_DDL.key(), records.schema().json());
} else {
throw new HoodieException("Bulk insert using row writer is not supported with current Spark version."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

package org.apache.hudi.spark3.internal;
package org.apache.hudi.spark.internal;

import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.table.HoodieTableConfig;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

package org.apache.hudi.spark3.internal;
package org.apache.hudi.spark.internal;

import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.index.HoodieIndex;
Expand All @@ -33,7 +33,7 @@
import java.io.IOException;

/**
* Hoodie's Implementation of {@link DataWriter<InternalRow>}. This is used in data source "hudi.spark3.internal" implementation for bulk insert.
* Hoodie's Implementation of {@link DataWriter<InternalRow>}. This is used in data source "hudi.spark.internal" implementation for bulk insert.
*/
public class HoodieBulkInsertDataInternalWriter implements DataWriter<InternalRow> {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

package org.apache.hudi.spark3.internal;
package org.apache.hudi.spark.internal;

import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

package org.apache.hudi.spark3.internal;
package org.apache.hudi.spark.internal;

import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.client.WriteStatus;
Expand All @@ -39,7 +39,7 @@
import java.util.stream.Collectors;

/**
* Implementation of {@link BatchWrite} for datasource "hudi.spark3.internal" to be used in datasource implementation
* Implementation of {@link BatchWrite} for datasource "hudi.spark.internal" to be used in datasource implementation
* of bulk insert.
*/
public class HoodieDataSourceInternalBatchWrite implements BatchWrite {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

package org.apache.hudi.spark3.internal;
package org.apache.hudi.spark.internal;

import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.storage.StorageConfiguration;
Expand All @@ -29,7 +29,7 @@
import java.util.Map;

/**
* Implementation of {@link WriteBuilder} for datasource "hudi.spark3.internal" to be used in datasource implementation
* Implementation of {@link WriteBuilder} for datasource "hudi.spark.internal" to be used in datasource implementation
* of bulk insert.
*/
public class HoodieDataSourceInternalBatchWriteBuilder implements WriteBuilder {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

package org.apache.hudi.spark3.internal;
package org.apache.hudi.spark.internal;

import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.storage.StorageConfiguration;
Expand All @@ -33,7 +33,7 @@
import java.util.Set;

/**
* Hoodie's Implementation of {@link SupportsWrite}. This is used in data source "hudi.spark3.internal" implementation for bulk insert.
* Hoodie's Implementation of {@link SupportsWrite}. This is used in data source "hudi.spark.internal" implementation for bulk insert.
*/
class HoodieDataSourceInternalTable implements SupportsWrite {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
*/

package org.apache.hudi.spark3.internal;
package org.apache.hudi.spark.internal;

import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.internal.BaseWriterCommitMessage;
Expand All @@ -26,7 +26,7 @@
import java.util.List;

/**
* Hoodie's {@link WriterCommitMessage} used in datasource "hudi.spark3.internal" implementation.
* Hoodie's {@link WriterCommitMessage} used in datasource "hudi.spark.internal" implementation.
*/
public class HoodieWriterCommitMessage extends BaseWriterCommitMessage
implements WriterCommitMessage {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.hudi.spark3.internal;
package org.apache.hudi.spark.internal;

import org.apache.spark.sql.catalyst.util.DateFormatter;

Expand Down
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: should these classes be renamed to *Spark* from *Spark3*?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about it too, and even tried to do so, but found that some classes already have such "brothers" (classes with the same names but without Spark version). So i decided to not rename anything. But i'll revisit this one more time in Spark 4 support PR.

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import org.apache.hudi.{AvroConversionUtils, DefaultSource, Spark3RowSerDe}
import org.apache.hudi.client.utils.SparkRowSerDe
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.util.JsonUtils
import org.apache.hudi.spark3.internal.ReflectUtil
import org.apache.hudi.spark.internal.ReflectUtil
import org.apache.hudi.storage.StoragePath

import org.apache.avro.Schema
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.plans.logcal
package org.apache.spark.sql.catalyst.plans.logical

import org.apache.hudi.DataSourceReadOptions

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.plans.logcal
package org.apache.spark.sql.catalyst.plans.logical

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.plans.logcal
package org.apache.spark.sql.catalyst.plans.logical

import org.apache.hudi.common.util.ValidationUtils.checkState

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.plans.logcal
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Finally, someone submitted a fix for this. Thank you!

package org.apache.spark.sql.catalyst.plans.logical

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.plans.logcal
package org.apache.spark.sql.catalyst.plans.logical

import org.apache.hudi.DataSourceReadOptions

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.execution.datasources

import org.apache.hudi.common.util.PartitionPathEncodeUtils.DEFAULT_PARTITION_PATH
import org.apache.hudi.spark3.internal.ReflectUtil
import org.apache.hudi.spark.internal.ReflectUtil

import org.apache.hadoop.fs.Path
import org.apache.spark.sql.catalyst.InternalRow
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NamedRe
import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.resolveExpressionByPlanChildren
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logcal._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.trees.Origin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.hudi.analysis

import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
import org.apache.spark.sql.catalyst.plans.logcal.{HoodieFileSystemViewTableValuedFunction, HoodieMetadataTableValuedFunction, HoodieQuery, HoodieTableChanges, HoodieTimelineTableValuedFunction}
import org.apache.spark.sql.catalyst.plans.logical.{HoodieFileSystemViewTableValuedFunction, HoodieMetadataTableValuedFunction, HoodieQuery, HoodieTableChanges, HoodieTimelineTableValuedFunction}

object TableValuedFunctions {

Expand Down
Loading
Loading