-
Notifications
You must be signed in to change notification settings - Fork 310
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[add][plugin][paimonwriter] Add support for writing files in Paimon f…
…ormat
- Loading branch information
1 parent
1c2ab7d
commit bda5e2d
Showing
10 changed files
with
741 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# Paimon Writer | ||
|
||
Paimon Writer 提供向 已有的paimon表写入数据的能力。 | ||
|
||
## 配置样例 | ||
|
||
```json | ||
--8<-- "jobs/paimonwriter.json" | ||
``` | ||
|
||
## 参数说明 | ||
|
||
| 配置项 | 是否必须 | 数据类型 | 默认值 | 说明 | | ||
|:-------------|:----:|--------|----|------------------------------------------------| | ||
| dbName | 是 | string | 无 | 要写入的paimon数据库名 | | ||
| tableName | 是 | string | 无 | 要写入的paimon表名 | | ||
| writeMode | 是 | string | 无 | 写入模式,详述见下 | | ||
| paimonConfig | 是 | json | {} | 里可以配置与 Paimon catalog和Hadoop 相关的一些高级参数,比如HA的配置 | | ||
|
||
|
||
|
||
### writeMode | ||
|
||
写入前数据清理处理模式: | ||
|
||
- append,写入前不做任何处理,直接写入,不清除原来的数据。 | ||
- truncate 写入前先清空表,再写入。 | ||
|
||
### paimonConfig | ||
|
||
`paimonConfig` 里可以配置与 Paimon catalog和Hadoop 相关的一些高级参数,比如HA的配置 | ||
```json | ||
{ | ||
"name": "paimonwriter", | ||
"parameter": { | ||
"dbName": "test", | ||
"tableName": "test2", | ||
"writeMode": "truncate", | ||
"paimonConfig": { | ||
"warehouse": "file:///g:/paimon", | ||
"metastore": "filesystem" | ||
} | ||
} | ||
} | ||
``` | ||
```json | ||
{ | ||
"paimonConfig": { | ||
"warehouse": "hdfs://nameservice1/user/hive/paimon", | ||
"metastore": "filesystem", | ||
"fs.defaultFS":"hdfs://nameservice1", | ||
"hadoop.security.authentication" : "kerberos", | ||
"hadoop.kerberos.principal" : "hive/[email protected]", | ||
"hadoop.kerberos.keytab" : "/tmp/[email protected]", | ||
"ha.zookeeper.quorum" : "test-pr-nn1:2181,test-pr-nn2:2181,test-pr-nn3:2181", | ||
"dfs.nameservices" : "nameservice1", | ||
"dfs.namenode.rpc-address.nameservice1.namenode371" : "test-pr-nn2:8020", | ||
"dfs.namenode.rpc-address.nameservice1.namenode265": "test-pr-nn1:8020", | ||
"dfs.namenode.keytab.file" : "/tmp/[email protected]", | ||
"dfs.namenode.keytab.enabled" : "true", | ||
"dfs.namenode.kerberos.principal" : "hdfs/[email protected]", | ||
"dfs.namenode.kerberos.internal.spnego.principal" : "HTTP/[email protected]", | ||
"dfs.ha.namenodes.nameservice1" : "namenode265,namenode371", | ||
"dfs.datanode.keytab.file" : "/tmp/[email protected]", | ||
"dfs.datanode.keytab.enabled" : "true", | ||
"dfs.datanode.kerberos.principal" : "hdfs/[email protected]", | ||
"dfs.client.use.datanode.hostname" : "false", | ||
"dfs.client.failover.proxy.provider.nameservice1" : "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider", | ||
"dfs.balancer.keytab.file" : "/tmp/[email protected]", | ||
"dfs.balancer.keytab.enabled" : "true", | ||
"dfs.balancer.kerberos.principal" : "hdfs/[email protected]" | ||
} | ||
} | ||
``` | ||
|
||
|
||
## 类型转换 | ||
|
||
| Addax 内部类型 | Paimon 数据类型 | | ||
|------------|------------------------------| | ||
| Integer | TINYINT,SMALLINT,INT,INTEGER | | ||
| Long | BIGINT | | ||
| Double | FLOAT,DOUBLE,DECIMAL | | ||
| String | STRING,VARCHAR,CHAR | | ||
| Boolean | BOOLEAN | | ||
| Date | DATE,TIMESTAMP | | ||
| Bytes | BINARY | | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
<assembly | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" | ||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-component-1.1.2.xsd"> | ||
<id>release</id> | ||
<formats> | ||
<format>dir</format> | ||
</formats> | ||
<includeBaseDirectory>false</includeBaseDirectory> | ||
<fileSets> | ||
<fileSet> | ||
<directory>src/main/resources</directory> | ||
<includes> | ||
<include>*.json</include> | ||
</includes> | ||
<outputDirectory>plugin/writer/${project.artifactId}</outputDirectory> | ||
</fileSet> | ||
<fileSet> | ||
<directory>target/</directory> | ||
<includes> | ||
<include>${project.artifactId}-${project.version}.jar</include> | ||
</includes> | ||
<outputDirectory>plugin/writer/${project.artifactId}</outputDirectory> | ||
</fileSet> | ||
</fileSets> | ||
|
||
<dependencySets> | ||
<dependencySet> | ||
<useProjectArtifact>false</useProjectArtifact> | ||
<outputDirectory>plugin/writer/${project.artifactId}/libs</outputDirectory> | ||
<scope>runtime</scope> | ||
<excludes> | ||
<exclude>com.wgzhao.addax:*</exclude> | ||
</excludes> | ||
</dependencySet> | ||
</dependencySets> | ||
</assembly> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
|
||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<parent> | ||
<groupId>com.wgzhao.addax</groupId> | ||
<artifactId>addax-all</artifactId> | ||
<version>4.2.3-SNAPSHOT</version> | ||
<relativePath>../../../pom.xml</relativePath> | ||
</parent> | ||
|
||
<artifactId>paimonwriter</artifactId> | ||
<name>paimon-writer</name> | ||
<description>PaimonWriter提供了本地写入paimon格式文件功能,建议开发、测试环境使用。</description> | ||
<packaging>jar</packaging> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>com.wgzhao.addax</groupId> | ||
<artifactId>addax-common</artifactId> | ||
<version>${project.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<artifactId>slf4j-log4j12</artifactId> | ||
<groupId>org.slf4j</groupId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>com.wgzhao.addax</groupId> | ||
<artifactId>addax-storage</artifactId> | ||
<version>${project.version}</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.paimon</groupId> | ||
<artifactId>paimon-bundle</artifactId> | ||
<version>1.0.0</version> | ||
</dependency> | ||
|
||
|
||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-common</artifactId> | ||
<version>${hadoop.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>com.fasterxml.jackson.core</groupId> | ||
<artifactId>jackson-databind</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.codehaus.jackson</groupId> | ||
<artifactId>jackson-core-asl</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.codehaus.jackson</groupId> | ||
<artifactId>jackson-mapper-asl</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.fasterxml.woodstox</groupId> | ||
<artifactId>woodstox-core</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-codec</groupId> | ||
<artifactId>commons-codec</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-net</groupId> | ||
<artifactId>commons-net</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>io.netty</groupId> | ||
<artifactId>netty</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>log4j</groupId> | ||
<artifactId>log4j</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>net.minidev</groupId> | ||
<artifactId>json-smart</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.codehaus.jettison</groupId> | ||
<artifactId>jettison</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-server</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.xerial.snappy</groupId> | ||
<artifactId>snappy-java</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.zookeeper</groupId> | ||
<artifactId>zookeeper</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-util</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-aws</artifactId> | ||
<version>${hadoop.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>com.fasterxml.jackson.core</groupId> | ||
<artifactId>jackson-databind</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.codehaus.jackson</groupId> | ||
<artifactId>jackson-core-asl</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.codehaus.jackson</groupId> | ||
<artifactId>jackson-mapper-asl</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.fasterxml.woodstox</groupId> | ||
<artifactId>woodstox-core</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-codec</groupId> | ||
<artifactId>commons-codec</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-net</groupId> | ||
<artifactId>commons-net</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>io.netty</groupId> | ||
<artifactId>netty</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>log4j</groupId> | ||
<artifactId>log4j</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>net.minidev</groupId> | ||
<artifactId>json-smart</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.codehaus.jettison</groupId> | ||
<artifactId>jettison</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-server</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.xerial.snappy</groupId> | ||
<artifactId>snappy-java</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.zookeeper</groupId> | ||
<artifactId>zookeeper</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-util</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-mapreduce-client-core</artifactId> | ||
<version>${hadoop.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>com.fasterxml.jackson.core</groupId> | ||
<artifactId>jackson-databind</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-codec</groupId> | ||
<artifactId>commons-codec</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>io.netty</groupId> | ||
<artifactId>netty</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-util</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
|
||
|
||
<dependency> | ||
<groupId>com.fasterxml.woodstox</groupId> | ||
<artifactId>woodstox-core</artifactId> | ||
<version>${woodstox.version}</version> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<artifactId>maven-assembly-plugin</artifactId> | ||
<configuration> | ||
<descriptors> | ||
<descriptor>package.xml</descriptor> | ||
</descriptors> | ||
<finalName>${project.artifactId}-${project.version}</finalName> | ||
</configuration> | ||
<executions> | ||
<execution> | ||
<id>release</id> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>single</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
Oops, something went wrong.