Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python: [3.7, 3.8, 3.9]
python: [3.8, 3.9]

steps:
- uses: actions/checkout@v2
Expand Down
57 changes: 52 additions & 5 deletions python/src/iceberg/io/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,49 @@
"""

from abc import ABC, abstractmethod
from typing import Union
from typing import Protocol, Union, runtime_checkable


@runtime_checkable
class InputStream(Protocol): # pragma: no cover
"""A protocol for the file-like object returned by InputFile.open(...)

This outlines the minimally required methods for a seekable input stream returned from an InputFile
implementation's `open(...)` method. These methods are a subset of IOBase/RawIOBase.
"""

def read(self, size: int) -> bytes:
...

def seek(self, offset: int, whence: int) -> None:
...

def tell(self) -> int:
...

def closed(self) -> bool:
...

def close(self) -> None:
...


@runtime_checkable
class OutputStream(Protocol): # pragma: no cover
"""A protocol for the file-like object returned by OutputFile.create(...)

This outlines the minimally required methods for a writable output stream returned from an OutputFile
implementation's `create(...)` method. These methods are a subset of IOBase/RawIOBase.
"""

def write(self, b: bytes) -> None:
...

def closed(self) -> bool:
...

def close(self) -> None:
...


class InputFile(ABC):
Expand All @@ -48,8 +90,11 @@ def exists(self) -> bool:
"""Checks whether the file exists"""

@abstractmethod
def open(self):
"""This method should return an instance of an seekable input stream."""
def open(self) -> InputStream:
"""This method should return an object that matches the InputStream protocol

If a file does not exist at `self.location`, this should raise a FileNotFoundError.
"""


class OutputFile(ABC):
Expand Down Expand Up @@ -77,8 +122,8 @@ def to_input_file(self) -> InputFile:
"""Returns an InputFile for the location of this output file"""

@abstractmethod
def create(self, overwrite: bool = False):
"""This method should return a file-like object.
def create(self, overwrite: bool = False) -> OutputStream:
"""This method should return an object that matches the OutputStream protocol.

Args:
overwrite(bool): If the file already exists at `self.location`
Expand All @@ -87,6 +132,8 @@ def create(self, overwrite: bool = False):


class FileIO(ABC):
"""A base class for FileIO implementations"""

@abstractmethod
def new_input(self, location: str) -> InputFile:
"""Get an InputFile instance to read bytes from the file at the given location"""
Expand Down
16 changes: 11 additions & 5 deletions python/tests/io/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import pytest

from iceberg.io.base import FileIO, InputFile, OutputFile
from iceberg.io.base import FileIO, InputFile, InputStream, OutputFile, OutputStream


class LocalInputFile(InputFile):
Expand All @@ -49,8 +49,11 @@ def __len__(self):
def exists(self):
return os.path.exists(self.parsed_location.path)

def open(self):
return open(self.parsed_location.path, "rb")
def open(self) -> InputStream:
input_file = open(self.parsed_location.path, "rb")
if not isinstance(input_file, InputStream):
raise RuntimeError("""Object returned from LocalInputFile.open does not match the OutputStream protocol.""")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

small nit: TypeError is better here but since it is a test it is minor.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

return input_file


class LocalOutputFile(OutputFile):
Expand Down Expand Up @@ -80,8 +83,11 @@ def exists(self):
def to_input_file(self):
return LocalInputFile(location=self.location)

def create(self, overwrite: bool = False) -> None:
return open(self.parsed_location.path, "wb" if overwrite else "xb")
def create(self, overwrite: bool = False) -> OutputStream:
output_file = open(self.parsed_location.path, "wb" if overwrite else "xb")
if not isinstance(output_file, OutputStream):
raise RuntimeError("""Object returned from LocalOutputFile.create does not match the OutputStream protocol.""")
return output_file


class LocalFileIO(FileIO):
Expand Down
2 changes: 1 addition & 1 deletion python/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

[tox]
envlist = py37,py38,py39,linters
envlist = py38,py39,linters
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1 year 4 months seems like quite a long time from now, do we really want to drop support?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, but Protocol was added in 3.8. If we're really aiming to use this for documentation purposes only and don't actually need the functionality of structural subtyping, I can change these to normal classes and include "These are Protocols for documentation purposes only" in the docstrings. How does that sound?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the backport not useable to keep 3.7?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're absolutely right, I was getting an import error and thought it wasn't in the backport, but I was expecting an override of the normal typing import line. I'll update this to use from typing_extensions and add back 3.7. Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated!


[testenv]
usedevelop = true
Expand Down