Skip to content

Commit

Permalink
fix EOF in gdal DuckDB FS, impl siblingfiles properly
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxxen committed Sep 27, 2024
1 parent 723430c commit bb33297
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 7 deletions.
30 changes: 26 additions & 4 deletions spatial/src/spatial/gdal/file_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace gdal {
class DuckDBFileHandle : public VSIVirtualHandle {
private:
unique_ptr<FileHandle> file_handle;
bool is_eof;

public:
explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p) : file_handle(std::move(file_handle_p)) {
Expand All @@ -30,6 +31,8 @@ class DuckDBFileHandle : public VSIVirtualHandle {
return static_cast<vsi_l_offset>(file_handle->SeekPosition());
}
int Seek(vsi_l_offset nOffset, int nWhence) override {
is_eof = false;

if (nWhence == SEEK_SET && nOffset == 0) {
// Use the reset function instead to allow compressed file handles to rewind
// even if they don't support seeking
Expand Down Expand Up @@ -66,11 +69,22 @@ class DuckDBFileHandle : public VSIVirtualHandle {
}
} catch (...) {
}

if(remaining_bytes != 0) {
if(file_handle->SeekPosition() == file_handle->GetFileSize()) {
// Is at EOF!
is_eof = true;
}
// else, error!
// unfortunately, this version of GDAL cant distinguish between errors and reading less bytes
// its avaiable in 3.9.2, but we're stuck on 3.8.5 for now.
}

return nCount - (remaining_bytes / nSize);
}

int Eof() override {
return file_handle->SeekPosition() == file_handle->GetFileSize() ? TRUE : FALSE;
return is_eof ? TRUE : FALSE;
}

size_t Write(const void *pBuffer, size_t nSize, size_t nCount) override {
Expand Down Expand Up @@ -122,6 +136,8 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {
return pszFilename + client_prefix.size();
}

string AddPrefix(const string &value) { return client_prefix + value; }

VSIVirtualHandle *Open(const char *prefixed_file_name, const char *access, bool bSetError,
CSLConstList /* papszOptions */) override {
auto file_name = StripPrefix(prefixed_file_name);
Expand Down Expand Up @@ -310,7 +326,8 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {
if (files_count >= max_files) {
return;
}
files.AddString(file_name.c_str());
const auto tmp = AddPrefix(file_name);
files.AddString(tmp.c_str());
files_count++;
});
return files.StealList();
Expand All @@ -321,9 +338,14 @@ class DuckDBFileSystemHandler : public VSIFilesystemHandler {

auto &fs = FileSystem::GetFileSystem(context);
CPLStringList files;
auto file_vector = fs.Glob(file_name);

auto file_name_without_ext = fs.JoinPath(StringUtil::GetFilePath(file_name), StringUtil::GetFileStem(file_name));
auto file_glob = file_name_without_ext + ".*";

auto file_vector = fs.Glob(file_glob);
for (auto &file : file_vector) {
files.AddString(file.c_str());
auto tmp = AddPrefix(file);
files.AddString(tmp.c_str());
}
return files.StealList();
}
Expand Down
7 changes: 4 additions & 3 deletions spatial/src/spatial/gdal/functions/st_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,16 @@ unique_ptr<FunctionData> GdalTableFunction::Bind(ClientContext &context, TableFu
}
}

// Now we can open the dataset
auto &ctx_state = GDALClientContextState::GetOrCreate(context);

auto siblings_params = input.named_parameters.find("sibling_files");
if (siblings_params != input.named_parameters.end()) {
for (auto &param : ListValue::GetChildren(siblings_params->second)) {
result->dataset_sibling_files.AddString(StringValue::Get(param).c_str());
result->dataset_sibling_files.AddString(ctx_state.GetPrefix(StringValue::Get(param)).c_str());
}
}

// Now we can open the dataset
auto &ctx_state = GDALClientContextState::GetOrCreate(context);

result->raw_file_name = input.inputs[0].GetValue<string>();
result->prefixed_file_name = ctx_state.GetPrefix(result->raw_file_name);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH], AUTHORITY["EPSG","4326"]]
Binary file not shown.
Binary file not shown.
7 changes: 7 additions & 0 deletions test/sql/gdal/gdal_shapefile.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
require spatial

# This used to fail because our GDAL filesytem wrapper was too aggressive in marking EOF
query I
SELECT COUNT(*) FROM st_read('__WORKING_DIRECTORY__/test/data/nyc_export/geo_export_42c9a823-5465-4f85-80b3-b294002094f2.shp');
----
5

0 comments on commit bb33297

Please sign in to comment.