-
Notifications
You must be signed in to change notification settings - Fork 421
Glue catalog commit table #140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d53785a
274b91b
8e8d39d
c3e1311
2b7a7d1
4fc25df
facb43b
ce4311f
116c6fd
66a4f46
2882d0d
8a8d4ff
70b64d8
1cfe9d2
252dc36
501e7a9
7afe318
f769101
4282d37
94cfc69
bb58e09
6d4efc8
5efb155
413935e
fe7da26
e8666dc
52ceaf8
8dfaf93
5a23638
ccb787c
8a29796
5e78af9
93f2cec
6932cee
0d38337
0ad4909
dccad75
b967284
bf06d26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |||||||
| from __future__ import annotations | ||||||||
|
|
||||||||
| import logging | ||||||||
| import re | ||||||||
| import uuid | ||||||||
| from abc import ABC, abstractmethod | ||||||||
| from dataclasses import dataclass | ||||||||
|
|
@@ -74,6 +75,17 @@ | |||||||
| LOCATION = "location" | ||||||||
| EXTERNAL_TABLE = "EXTERNAL_TABLE" | ||||||||
|
|
||||||||
| TABLE_METADATA_FILE_NAME_REGEX = re.compile( | ||||||||
| r""" | ||||||||
| (\d+) # version number | ||||||||
| - # separator | ||||||||
| ([\w-]{36}) # UUID (36 characters, including hyphens) | ||||||||
| (?:\.\w+)? # optional codec name | ||||||||
| \.metadata\.json # file extension | ||||||||
| """, | ||||||||
| re.X, | ||||||||
| ) | ||||||||
|
|
||||||||
|
|
||||||||
| class CatalogType(Enum): | ||||||||
| REST = "rest" | ||||||||
|
|
@@ -587,8 +599,38 @@ def _write_metadata(metadata: TableMetadata, io: FileIO, metadata_path: str) -> | |||||||
| ToOutputFile.table_metadata(metadata, io.new_output(metadata_path)) | ||||||||
|
|
||||||||
| @staticmethod | ||||||||
| def _get_metadata_location(location: str) -> str: | ||||||||
| return f"{location}/metadata/00000-{uuid.uuid4()}.metadata.json" | ||||||||
| def _get_metadata_location(location: str, new_version: int = 0) -> str: | ||||||||
| if new_version < 0: | ||||||||
| raise ValueError(f"Table metadata version: `{new_version}` must be a non-negative integer") | ||||||||
| version_str = f"{new_version:05d}" | ||||||||
| return f"{location}/metadata/{version_str}-{uuid.uuid4()}.metadata.json" | ||||||||
|
|
||||||||
| @staticmethod | ||||||||
| def _parse_metadata_version(metadata_location: str) -> int: | ||||||||
| """Parse the version from the metadata location. | ||||||||
|
|
||||||||
| The version is the first part of the file name, before the first dash. | ||||||||
| For example, the version of the metadata file | ||||||||
| `s3://bucket/db/tb/metadata/00001-6c97e413-d51b-4538-ac70-12fe2a85cb83.metadata.json` | ||||||||
| is 1. | ||||||||
| If the path does not comply with the pattern, the version is defaulted to be -1, ensuring | ||||||||
| that the next metadata file is treated as having version 0. | ||||||||
|
|
||||||||
| Args: | ||||||||
| metadata_location (str): The location of the metadata file. | ||||||||
|
|
||||||||
| Returns: | ||||||||
| int: The version of the metadata file. -1 if the file name does not have valid version string | ||||||||
| """ | ||||||||
| file_name = metadata_location.split("/")[-1] | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also gracefully return
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar reason as above, since this is an internal method, I think we can rely on the type-check to ensure |
||||||||
| if file_name_match := TABLE_METADATA_FILE_NAME_REGEX.fullmatch(file_name): | ||||||||
| try: | ||||||||
| uuid.UUID(file_name_match.group(2)) | ||||||||
| except ValueError: | ||||||||
| return -1 | ||||||||
| return int(file_name_match.group(1)) | ||||||||
| else: | ||||||||
| return -1 | ||||||||
|
|
||||||||
| def _get_updated_props_and_update_summary( | ||||||||
| self, current_properties: Properties, removals: Optional[Set[str]], updates: Properties | ||||||||
|
|
||||||||
Uh oh!
There was an error while loading. Please reload this page.