Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ conbench/.isort.cfg
arrow-flight/src/arrow.flight.protocol.rs
arrow-flight/src/sql/arrow.flight.protocol.sql.rs
.github/*
parquet/src/bin/parquet-fromcsv-help.txt
6 changes: 5 additions & 1 deletion parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"]
# Enable arrow reader/writer APIs
arrow = ["dep:arrow", "base64"]
# Enable CLI tools
cli = ["serde_json", "base64", "clap"]
cli = ["serde_json", "base64", "clap","arrow/csv"]
# Enable internal testing APIs
test_common = []
# Experimental, unstable functionality primarily used for testing
Expand All @@ -91,6 +91,10 @@ required-features = ["cli"]
name = "parquet-rowcount"
required-features = ["cli"]

[[bin]]
name = "parquet-fromcsv"
required-features = ["cli"]

[[bench]]
name = "arrow_writer"
required-features = ["arrow"]
Expand Down
67 changes: 67 additions & 0 deletions parquet/src/bin/parquet-fromcsv-help.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
parquet 15.0.0
Apache Arrow <[email protected]>
Binary to convert csv to Parquet

USAGE:
parquet [OPTIONS] --schema <SCHEMA> --input-file <INPUT_FILE> --output-file <OUTPUT_FILE>

OPTIONS:
-b, --batch-size <BATCH_SIZE>
batch size

[env: PARQUET_FROM_CSV_BATCHSIZE=]
[default: 1000]

-c, --parquet-compression <PARQUET_COMPRESSION>
compression mode

[default: SNAPPY]

-d, --delimiter <DELIMITER>
field delimiter

default value: when input_format==CSV: ',' when input_format==TSV: 'TAB'

-D, --double-quote <DOUBLE_QUOTE>
double quote

-e, --escape-char <ESCAPE_CHAR>
escape charactor

-f, --input-format <INPUT_FORMAT>
input file format

[default: csv]
[possible values: csv, tsv]

-h, --has-header
has header

--help
Print help information

-i, --input-file <INPUT_FILE>
input CSV file

-m, --max-row-group-size <MAX_ROW_GROUP_SIZE>
max row group size

-o, --output-file <OUTPUT_FILE>
output Parquet file

-q, --quote-char <QUOTE_CHAR>
quate charactor

-r, --record-terminator <RECORD_TERMINATOR>
record terminator

[possible values: lf, crlf, cr]

-s, --schema <SCHEMA>
message schema for output Parquet

-V, --version
Print version information

-w, --writer-version <WRITER_VERSION>
writer version
Loading