-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-1780 - JDBC Adapter to convert Relational Data objects to Arrow Data Format Vector Objects #1759
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARROW-1780 - JDBC Adapter to convert Relational Data objects to Arrow Data Format Vector Objects #1759
Changes from 75 commits
1b2d27f
c482b50
e462f80
89d02bf
c9d2727
b003abb
134e339
333bc01
32fa968
1e5c584
127b4fb
8e2b7f5
772ae96
a556695
e3c490d
df444ad
15f281f
a32f788
1b4175f
4c706f5
9ead27d
73b0198
c97e910
9459221
5c1f5f2
f76ac48
a4d2b32
5fb446c
7f70a67
c2ac474
f834c77
e1c1078
378f660
ba8156d
fdb9d71
30a5341
ff4c79b
646eba6
8c21902
f5e95d2
1dd9079
5c56cf7
eff855a
230ba4a
f955614
6911c40
f624bce
3bb513a
8e25761
a007f31
6641ad4
bffbe13
29b40cd
f2c4ae0
d0e55f9
f5f6efe
8a218c4
1441d3b
3b99621
5a59278
a6cc2fe
76c1efb
7f8f613
1d0f29b
8b52507
654325b
63f7735
68c3a61
69d5202
1400fc8
fe1f27d
3b4ebc5
c5b3f56
00d2f11
6556a44
8b23396
e55cb48
fbd84fb
2c56d59
b8dce96
85a2354
e1ed84d
f757164
540f69b
66f19e5
4830800
675a570
2b6b720
32bfb1f
bafbd1a
09b526e
7320cdc
0707a0c
af43943
f3b923c
6578d17
9a6aa9a
501aacc
d1b0992
b95e878
da1227d
857a4b4
6b0b00e
7cc649c
73d1097
054d1eb
75d5d1f
c2ba1ee
e2ba906
acda6e3
f06d85e
4bfbfe6
4329c4b
840970c
2069103
95f8e23
a68cc23
60314c6
32ba284
aa6eb92
3a2b312
02952c4
eac219a
2129ff9
9990616
52c0047
16cda22
f685b93
dcaa74a
239fca6
13ec352
12780ed
0c78755
67593cd
d260342
6254260
c9b22fe
fa65a31
9a2f463
7125d6e
b124ece
654a5e4
e189b14
c66f4a2
abaea4e
16d8ec1
25eadcf
dd1ffa4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor | ||
| license agreements. See the NOTICE file distributed with this work for additional | ||
| information regarding copyright ownership. The ASF licenses this file to | ||
| You under the Apache License, Version 2.0 (the "License"); you may not use | ||
| this file except in compliance with the License. You may obtain a copy of | ||
| the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required | ||
| by applicable law or agreed to in writing, software distributed under the | ||
| License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS | ||
| OF ANY KIND, either express or implied. See the License for the specific | ||
| language governing permissions and limitations under the License. --> | ||
|
|
||
| <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
| <modelVersion>4.0.0</modelVersion> | ||
| <parent> | ||
| <groupId>org.apache.arrow</groupId> | ||
| <artifactId>arrow-java-root</artifactId> | ||
| <version>0.10.0-SNAPSHOT</version> | ||
| </parent> | ||
|
|
||
| <artifactId>arrow-jdbc</artifactId> | ||
| <name>Arrow JDBC Adapter</name> | ||
| <url>http://maven.apache.org</url> | ||
| <dependencies> | ||
|
|
||
| <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory --> | ||
| <dependency> | ||
| <groupId>org.apache.arrow</groupId> | ||
| <artifactId>arrow-memory</artifactId> | ||
| <version>${project.version}</version> | ||
| </dependency> | ||
| <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-vector --> | ||
| <dependency> | ||
| <groupId>org.apache.arrow</groupId> | ||
| <artifactId>arrow-vector</artifactId> | ||
| <version>${project.version}</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.google.guava</groupId> | ||
| <artifactId>guava</artifactId> | ||
| <version>18.0</version> | ||
| </dependency> | ||
|
|
||
|
|
||
| <!-- Test --> | ||
| <dependency> | ||
| <groupId>junit</groupId> | ||
| <artifactId>junit</artifactId> | ||
| <version>4.11</version> | ||
|
||
| <scope>test</scope> | ||
| </dependency> | ||
| <!-- https://mvnrepository.com/artifact/com.h2database/h2 --> | ||
| <dependency> | ||
| <groupId>com.h2database</groupId> | ||
| <artifactId>h2</artifactId> | ||
| <version>1.4.196</version> | ||
| <scope>test</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.fasterxml.jackson.dataformat</groupId> | ||
| <artifactId>jackson-dataformat-yaml</artifactId> | ||
| <version>2.7.9</version> | ||
|
||
| <scope>test</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.fasterxml.jackson.core</groupId> | ||
| <artifactId>jackson-databind</artifactId> | ||
| <version>2.7.9</version> | ||
|
||
| <scope>test</scope> | ||
| </dependency> | ||
| <!-- https://mvnrepository.com/artifact/com.google.collections/google-collections --> | ||
| <dependency> | ||
| <groupId>com.google.collections</groupId> | ||
|
||
| <artifactId>google-collections</artifactId> | ||
| <version>1.0</version> | ||
| <scope>test</scope> | ||
| </dependency> | ||
|
|
||
| </dependencies> | ||
|
|
||
| <build> | ||
| <plugins> | ||
| <plugin> | ||
| <groupId>org.apache.maven.plugins</groupId> | ||
| <artifactId>maven-surefire-plugin</artifactId> | ||
| <configuration> | ||
| <systemPropertyVariables> | ||
| <user.timezone>UTC</user.timezone> | ||
| </systemPropertyVariables> | ||
| </configuration> | ||
|
|
||
| </plugin> | ||
| </plugins> | ||
| </build> | ||
| </project> | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.arrow.adapter.jdbc; | ||
|
|
||
| import org.apache.arrow.memory.RootAllocator; | ||
| import org.apache.arrow.vector.VectorSchemaRoot; | ||
|
|
||
| import com.google.common.base.Preconditions; | ||
|
|
||
| import java.sql.Connection; | ||
| import java.sql.SQLException; | ||
| import java.sql.Statement; | ||
| import java.sql.ResultSet; | ||
|
|
||
| /** | ||
| * Utility class to convert JDBC objects to columnar Arrow format objects. | ||
| * | ||
| * This utility uses following data mapping to map JDBC/SQL datatype to Arrow data types. | ||
| * | ||
| * CHAR --> ArrowType.Utf8 | ||
| * NCHAR --> ArrowType.Utf8 | ||
| * VARCHAR --> ArrowType.Utf8 | ||
| * NVARCHAR --> ArrowType.Utf8 | ||
| * LONGVARCHAR --> ArrowType.Utf8 | ||
| * LONGNVARCHAR --> ArrowType.Utf8 | ||
| * NUMERIC --> ArrowType.Decimal(precision, scale) | ||
| * DECIMAL --> ArrowType.Decimal(precision, scale) | ||
| * BIT --> ArrowType.Bool | ||
| * TINYINT --> ArrowType.Int(8, signed) | ||
| * SMALLINT --> ArrowType.Int(16, signed) | ||
| * INTEGER --> ArrowType.Int(32, signed) | ||
| * BIGINT --> ArrowType.Int(64, signed) | ||
| * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) | ||
| * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) | ||
| * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) | ||
| * BINARY --> ArrowType.Binary | ||
| * VARBINARY --> ArrowType.Binary | ||
| * LONGVARBINARY --> ArrowType.Binary | ||
| * DATE --> ArrowType.Date(DateUnit.MILLISECOND) | ||
| * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) | ||
| * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) | ||
| * CLOB --> ArrowType.Utf8 | ||
| * BLOB --> ArrowType.Binary | ||
| * | ||
| * @since 0.10.0 | ||
| */ | ||
| public class JdbcToArrow { | ||
|
|
||
| /** | ||
| * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. | ||
| * | ||
| * @param connection Database connection to be used. This method will not close the passed connection object. Since hte caller has passed | ||
| * the connection object it's the responsibility of the caller to close or return the connection to the pool. | ||
| * @param query The DB Query to fetch the data. | ||
| * @return Arrow Data Objects {@link VectorSchemaRoot} | ||
| * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as ResultSet and Statement objects. | ||
| */ | ||
| public static VectorSchemaRoot sqlToArrow(Connection connection, String query, RootAllocator rootAllocator) throws SQLException { | ||
| Preconditions.checkNotNull(connection, "JDBC connection object can not be null"); | ||
| Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty"); | ||
|
|
||
| try (Statement stmt = connection.createStatement()) { | ||
| return sqlToArrow(stmt.executeQuery(query), rootAllocator); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. | ||
| * | ||
| * @param resultSet | ||
| * @return Arrow Data Objects {@link VectorSchemaRoot} | ||
| * @throws Exception | ||
| */ | ||
| public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException { | ||
| Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); | ||
|
|
||
| RootAllocator rootAllocator = new RootAllocator(Integer.MAX_VALUE); | ||
| VectorSchemaRoot root = sqlToArrow(resultSet, rootAllocator); | ||
| rootAllocator.close(); | ||
|
||
| return root; | ||
| } | ||
|
|
||
| /** | ||
| * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. | ||
| * | ||
| * @param resultSet | ||
| * @return Arrow Data Objects {@link VectorSchemaRoot} | ||
| * @throws Exception | ||
| */ | ||
| public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, RootAllocator rootAllocator) throws SQLException { | ||
|
||
| Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); | ||
| Preconditions.checkNotNull(rootAllocator, "Root Allocator object can not be null"); | ||
|
|
||
| VectorSchemaRoot root = VectorSchemaRoot.create( | ||
| JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData()), rootAllocator); | ||
| JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root); | ||
| return root; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
replace with ${dep.guava.version}