Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
32d45bb
Add basic CSV Writer C++ Proxy class.
kevingurney Sep 13, 2023
36bff74
Add basic CSV Writer MATLAB class implementation.
kevingurney Sep 13, 2023
d9f2e30
Fix namespace and header include.
kevingurney Sep 13, 2023
ccd5ad9
Use WriterProxy type alias.
kevingurney Sep 13, 2023
67d087f
Add CSV writer source code to CMakeLists.txt.
kevingurney Sep 13, 2023
a01ffa1
Delete old Feather code for Writer properties.
kevingurney Sep 13, 2023
7140b6d
1. Rename `arrow.io.csv.Writer` to `arrow.io.csv.TableWriter`.
kevingurney Sep 13, 2023
12b4f31
Add basic CSV round-trip tests.
kevingurney Sep 15, 2023
9c88a9f
Use default values for read, write, parse, and convert options for CSV.
kevingurney Sep 15, 2023
b1a5c5c
1. Parameterize CSV tests.
kevingurney Sep 15, 2023
58a5ea3
Add verifyRoundTrip method to CSVTest class.
kevingurney Sep 15, 2023
934e9ad
Remove unused TestMethodSetup block in tError.
kevingurney Sep 15, 2023
356e2ea
1. Update arguments block type properties for TableReader and TableWr…
kevingurney Sep 18, 2023
54179e2
1. Set access for Filename property of TableWriter.
kevingurney Sep 18, 2023
6a5126c
Enable Arrow CSV functionality with `-D ARROW_CSV=ON` in MATLAB CI wo…
kevingurney Sep 18, 2023
35aefb4
Enable ARROW_CSV in ExternalProject_Add call.
kevingurney Sep 18, 2023
b3fb5dc
Enable ARROW_CSV component in ExternalProject_Add call.
kevingurney Sep 19, 2023
a843a8d
Fix CMake linting errors.
kevingurney Sep 19, 2023
67d9ff9
Use `auto` for declaring options in `TableReader::read`.
kevingurney Sep 20, 2023
73deaa6
Use `auto` keyword when initializing output stream in `TableWriter`.
kevingurney Sep 20, 2023
ecf2bee
Use `auto` keyword when declaring `WriteOptions`.
kevingurney Sep 20, 2023
aea2f39
Mark `output_stream` as `const`.
kevingurney Sep 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions matlab/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ function(build_arrow)

set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix")
set(ARROW_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-build")
set(ARROW_CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DARROW_BUILD_STATIC=OFF")
set(ARROW_CMAKE_ARGS
"-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DARROW_BUILD_STATIC=OFF" "-DARROW_CSV=ON")

add_library(arrow_shared SHARED IMPORTED)
set(ARROW_LIBRARY_TARGET arrow_shared)
Expand Down
3 changes: 3 additions & 0 deletions matlab/src/cpp/arrow/matlab/error/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ namespace arrow::matlab::error {
static const char* TABLE_INVALID_NUMERIC_COLUMN_INDEX = "arrow:tabular:table:InvalidNumericColumnIndex";
static const char* FAILED_TO_OPEN_FILE_FOR_WRITE = "arrow:io:FailedToOpenFileForWrite";
static const char* FAILED_TO_OPEN_FILE_FOR_READ = "arrow:io:FailedToOpenFileForRead";
static const char* CSV_FAILED_TO_WRITE_TABLE = "arrow:io:csv:FailedToWriteTable";
static const char* CSV_FAILED_TO_CREATE_TABLE_READER = "arrow:io:csv:FailedToCreateTableReader";
static const char* CSV_FAILED_TO_READ_TABLE = "arrow:io:csv:FailedToReadTable";
static const char* FEATHER_FAILED_TO_WRITE_TABLE = "arrow:io:feather:FailedToWriteTable";
static const char* TABLE_FROM_RECORD_BATCH = "arrow:table:FromRecordBatch";
static const char* FEATHER_FAILED_TO_CREATE_READER = "arrow:io:feather:FailedToCreateReader";
Expand Down
93 changes: 93 additions & 0 deletions matlab/src/cpp/arrow/matlab/io/csv/proxy/table_reader.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "libmexclass/proxy/ProxyManager.h"

#include "arrow/matlab/error/error.h"
#include "arrow/matlab/io/csv/proxy/table_reader.h"
#include "arrow/matlab/tabular/proxy/table.h"

#include "arrow/util/utf8.h"

#include "arrow/result.h"

#include "arrow/io/file.h"
#include "arrow/io/interfaces.h"
#include "arrow/csv/reader.h"
#include "arrow/table.h"

namespace arrow::matlab::io::csv::proxy {

TableReader::TableReader(const std::string& filename) : filename{filename} {
REGISTER_METHOD(TableReader, read);
REGISTER_METHOD(TableReader, getFilename);
}

libmexclass::proxy::MakeResult TableReader::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
namespace mda = ::matlab::data;
using TableReaderProxy = arrow::matlab::io::csv::proxy::TableReader;

mda::StructArray args = constructor_arguments[0];
const mda::StringArray filename_utf16_mda = args[0]["Filename"];
const auto filename_utf16 = std::u16string(filename_utf16_mda[0]);
MATLAB_ASSIGN_OR_ERROR(const auto filename, arrow::util::UTF16StringToUTF8(filename_utf16), error::UNICODE_CONVERSION_ERROR_ID);

return std::make_shared<TableReaderProxy>(filename);
}

void TableReader::read(libmexclass::proxy::method::Context& context) {
namespace mda = ::matlab::data;
using namespace libmexclass::proxy;
namespace csv = ::arrow::csv;
using TableProxy = arrow::matlab::tabular::proxy::Table;

mda::ArrayFactory factory;

// Create a file input stream.
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto source, arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool()), context, error::FAILED_TO_OPEN_FILE_FOR_READ);

const ::arrow::io::IOContext io_context;
const auto read_options = csv::ReadOptions::Defaults();
const auto parse_options = csv::ParseOptions::Defaults();
const auto convert_options = csv::ConvertOptions::Defaults();

// Create a TableReader from the file input stream.
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto table_reader,
csv::TableReader::Make(io_context, source, read_options, parse_options, convert_options),
context,
error::CSV_FAILED_TO_CREATE_TABLE_READER);

// Read a Table from the file.
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto table, table_reader->Read(), context, error::CSV_FAILED_TO_READ_TABLE);

auto table_proxy = std::make_shared<TableProxy>(table);
const auto table_proxy_id = ProxyManager::manageProxy(table_proxy);

const auto table_proxy_id_mda = factory.createScalar(table_proxy_id);

context.outputs[0] = table_proxy_id_mda;
}

void TableReader::getFilename(libmexclass::proxy::method::Context& context) {
namespace mda = ::matlab::data;
mda::ArrayFactory factory;

MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto filename_utf16, arrow::util::UTF8StringToUTF16(filename), context, error::UNICODE_CONVERSION_ERROR_ID);
auto filename_utf16_mda = factory.createScalar(filename_utf16);
context.outputs[0] = filename_utf16_mda;
}

}
38 changes: 38 additions & 0 deletions matlab/src/cpp/arrow/matlab/io/csv/proxy/table_reader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "libmexclass/proxy/Proxy.h"

namespace arrow::matlab::io::csv::proxy {

class TableReader : public libmexclass::proxy::Proxy {
public:
TableReader(const std::string& filename);
~TableReader() {}
static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments);

protected:
void read(libmexclass::proxy::method::Context& context);
void getFilename(libmexclass::proxy::method::Context& context);

private:
const std::string filename;
};

}
86 changes: 86 additions & 0 deletions matlab/src/cpp/arrow/matlab/io/csv/proxy/table_writer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/matlab/io/csv/proxy/table_writer.h"
#include "arrow/matlab/tabular/proxy/table.h"
#include "arrow/matlab/error/error.h"

#include "arrow/result.h"
#include "arrow/table.h"
#include "arrow/util/utf8.h"

#include "arrow/io/file.h"
#include "arrow/csv/writer.h"
#include "arrow/csv/options.h"

#include "libmexclass/proxy/ProxyManager.h"

namespace arrow::matlab::io::csv::proxy {

TableWriter::TableWriter(const std::string& filename) : filename{filename} {
REGISTER_METHOD(TableWriter, getFilename);
REGISTER_METHOD(TableWriter, write);
}

libmexclass::proxy::MakeResult TableWriter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
namespace mda = ::matlab::data;
mda::StructArray opts = constructor_arguments[0];
const mda::StringArray filename_mda = opts[0]["Filename"];
using TableWriterProxy = ::arrow::matlab::io::csv::proxy::TableWriter;

const auto filename_utf16 = std::u16string(filename_mda[0]);
MATLAB_ASSIGN_OR_ERROR(const auto filename_utf8,
arrow::util::UTF16StringToUTF8(filename_utf16),
error::UNICODE_CONVERSION_ERROR_ID);

return std::make_shared<TableWriterProxy>(filename_utf8);
}

void TableWriter::getFilename(libmexclass::proxy::method::Context& context) {
namespace mda = ::matlab::data;
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_filename,
arrow::util::UTF8StringToUTF16(filename),
context,
error::UNICODE_CONVERSION_ERROR_ID);
mda::ArrayFactory factory;
auto str_mda = factory.createScalar(utf16_filename);
context.outputs[0] = str_mda;
}

void TableWriter::write(libmexclass::proxy::method::Context& context) {
namespace csv = ::arrow::csv;
namespace mda = ::matlab::data;
using TableProxy = ::arrow::matlab::tabular::proxy::Table;

mda::StructArray opts = context.inputs[0];
const mda::TypedArray<uint64_t> table_proxy_id_mda = opts[0]["TableProxyID"];
const uint64_t table_proxy_id = table_proxy_id_mda[0];

auto proxy = libmexclass::proxy::ProxyManager::getProxy(table_proxy_id);
auto table_proxy = std::static_pointer_cast<TableProxy>(proxy);
auto table = table_proxy->unwrap();

MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto output_stream,
arrow::io::FileOutputStream::Open(filename),
context,
error::FAILED_TO_OPEN_FILE_FOR_WRITE);
const auto options = csv::WriteOptions::Defaults();
MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(csv::WriteCSV(*table, options, output_stream.get()),
context,
error::CSV_FAILED_TO_WRITE_TABLE);
}
}
38 changes: 38 additions & 0 deletions matlab/src/cpp/arrow/matlab/io/csv/proxy/table_writer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "libmexclass/proxy/Proxy.h"

namespace arrow::matlab::io::csv::proxy {

class TableWriter : public libmexclass::proxy::Proxy {
public:
TableWriter(const std::string& filename);
~TableWriter() {}
static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments);

protected:
void getFilename(libmexclass::proxy::method::Context& context);
void write(libmexclass::proxy::method::Context& context);

private:
const std::string filename;
};

}
4 changes: 4 additions & 0 deletions matlab/src/cpp/arrow/matlab/proxy/factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include "arrow/matlab/type/proxy/field.h"
#include "arrow/matlab/io/feather/proxy/writer.h"
#include "arrow/matlab/io/feather/proxy/reader.h"
#include "arrow/matlab/io/csv/proxy/table_writer.h"
#include "arrow/matlab/io/csv/proxy/table_reader.h"

#include "factory.h"

Expand Down Expand Up @@ -85,6 +87,8 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name,
REGISTER_PROXY(arrow.type.proxy.StructType , arrow::matlab::type::proxy::StructType);
REGISTER_PROXY(arrow.io.feather.proxy.Writer , arrow::matlab::io::feather::proxy::Writer);
REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader);
REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter);
REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader);

return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name};
};
Expand Down
51 changes: 51 additions & 0 deletions matlab/src/matlab/+arrow/+io/+csv/TableReader.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
%TABLEREADER Reads tabular data from a CSV file into an arrow.tabular.Table.

% Licensed to the Apache Software Foundation (ASF) under one or more
% contributor license agreements. See the NOTICE file distributed with
% this work for additional information regarding copyright ownership.
% The ASF licenses this file to you under the Apache License, Version
% 2.0 (the "License"); you may not use this file except in compliance
% with the License. You may obtain a copy of the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS,
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
% implied. See the License for the specific language governing
% permissions and limitations under the License.

classdef TableReader

properties (GetAccess=public, SetAccess=private, Hidden)
Proxy
end

properties (Dependent, SetAccess=private, GetAccess=public)
Filename
end

methods

function obj = TableReader(filename)
arguments
filename (1, 1) string {mustBeNonmissing, mustBeNonzeroLengthText}
end

args = struct(Filename=filename);
obj.Proxy = arrow.internal.proxy.create("arrow.io.csv.proxy.TableReader", args);
end

function table = read(obj)
tableProxyID = obj.Proxy.read();
proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.Table", ID=tableProxyID);
table = arrow.tabular.Table(proxy);
end

function filename = get.Filename(obj)
filename = obj.Proxy.getFilename();
end

end

end
Loading