diff --git a/.github/workflows/open-api.yml b/.github/workflows/open-api.yml index e1703ce4777d..404eb13c2714 100644 --- a/.github/workflows/open-api.yml +++ b/.github/workflows/open-api.yml @@ -45,4 +45,4 @@ jobs: python-version: 3.9 - name: Install working-directory: ./open-api - run: pip install openapi-spec-validator && openapi-spec-validator rest-catalog-open-api.yaml \ No newline at end of file + run: pip install openapi-spec-validator && openapi-spec-validator rest-catalog-open-api.yaml diff --git a/python/.pre-commit-config.yaml b/python/.pre-commit-config.yaml index 9a219be8cc50..717b4297832b 100644 --- a/python/.pre-commit-config.yaml +++ b/python/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: rev: v0.971 hooks: - id: mypy - args: [--config=python/pyproject.toml] + args: [ --install-types, --non-interactive, --config=python/pyproject.toml] - repo: https://github.com/hadialqattan/pycln rev: v2.0.4 hooks: diff --git a/python/poetry.lock b/python/poetry.lock index 2296e0362532..d95bd8729bb9 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -20,6 +20,14 @@ docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] +[[package]] +name = "certifi" +version = "2022.6.15" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "cffi" version = "1.15.1" @@ -39,6 +47,17 @@ category = "dev" optional = false python-versions = ">=3.6.1" +[[package]] +name = "charset-normalizer" +version = "2.1.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + [[package]] name = "colorama" version = "0.4.5" @@ -114,6 +133,14 @@ python-versions = ">=3.7" [package.extras] license = ["ukkonen"] +[[package]] +name = "idna" +version = "3.3" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "importlib-metadata" version = "4.12.0" @@ -205,8 +232,8 @@ optional = false python-versions = ">=3.6" [package.extras] -testing = ["pytest-benchmark", "pytest"] -dev = ["tox", "pre-commit"] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] [[package]] name = "pre-commit" @@ -331,6 +358,40 @@ category = "dev" optional = false python-versions = ">=3.6" +[[package]] +name = "requests" +version = "2.28.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-mock" +version = "1.9.3" +description = "Mock out responses from the requests package" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +requests = ">=2.3,<3" +six = "*" + +[package.extras] +fixture = ["fixtures"] +test = ["fixtures", "mock", "purl", "pytest", "sphinx", "testrepository (>=0.0.18)", "testtools"] + [[package]] name = "six" version = "1.16.0" @@ -363,6 +424,19 @@ category = "main" optional = false python-versions = ">=3.7" +[[package]] +name = "urllib3" +version = "1.26.10" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" + +[package.extras] +brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + [[package]] name = "virtualenv" version = "20.15.1" @@ -415,16 +489,18 @@ snappy = ["python-snappy"] [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "906929f3e98c56eb63e4175d071dd366746751e9357a45d18a223d68ece38498" +content-hash = "ff76c4a6349ee3a909543aa6587ace0ab6795e08e97e892a90553b6fd6a589af" [metadata.files] -atomicwrites = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, -] +atomicwrites = [] attrs = [ {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, ] +certifi = [ + {file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"}, + {file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"}, +] cffi = [ {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, @@ -495,87 +571,29 @@ cfgv = [ {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"}, {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, ] +charset-normalizer = [ + {file = "charset-normalizer-2.1.0.tar.gz", hash = "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"}, + {file = "charset_normalizer-2.1.0-py3-none-any.whl", hash = "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5"}, +] colorama = [ {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, ] -coverage = [ - {file = "coverage-6.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a9032f9b7d38bdf882ac9f66ebde3afb8145f0d4c24b2e600bc4c6304aafb87e"}, - {file = "coverage-6.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e0524adb49c716ca763dbc1d27bedce36b14f33e6b8af6dba56886476b42957c"}, - {file = "coverage-6.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4548be38a1c810d79e097a38107b6bf2ff42151900e47d49635be69943763d8"}, - {file = "coverage-6.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f23876b018dfa5d3e98e96f5644b109090f16a4acb22064e0f06933663005d39"}, - {file = "coverage-6.4.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fe75dcfcb889b6800f072f2af5a331342d63d0c1b3d2bf0f7b4f6c353e8c9c0"}, - {file = "coverage-6.4.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2f8553878a24b00d5ab04b7a92a2af50409247ca5c4b7a2bf4eabe94ed20d3ee"}, - {file = "coverage-6.4.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:d774d9e97007b018a651eadc1b3970ed20237395527e22cbeb743d8e73e0563d"}, - {file = "coverage-6.4.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d56f105592188ce7a797b2bd94b4a8cb2e36d5d9b0d8a1d2060ff2a71e6b9bbc"}, - {file = "coverage-6.4.2-cp310-cp310-win32.whl", hash = "sha256:d230d333b0be8042ac34808ad722eabba30036232e7a6fb3e317c49f61c93386"}, - {file = "coverage-6.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:5ef42e1db047ca42827a85e34abe973971c635f83aed49611b7f3ab49d0130f0"}, - {file = "coverage-6.4.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:25b7ec944f114f70803d6529394b64f8749e93cbfac0fe6c5ea1b7e6c14e8a46"}, - {file = "coverage-6.4.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bb00521ab4f99fdce2d5c05a91bddc0280f0afaee0e0a00425e28e209d4af07"}, - {file = "coverage-6.4.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dff52b3e7f76ada36f82124703f4953186d9029d00d6287f17c68a75e2e6039"}, - {file = "coverage-6.4.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:147605e1702d996279bb3cc3b164f408698850011210d133a2cb96a73a2f7996"}, - {file = "coverage-6.4.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:422fa44070b42fef9fb8dabd5af03861708cdd6deb69463adc2130b7bf81332f"}, - {file = "coverage-6.4.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8af6c26ba8df6338e57bedbf916d76bdae6308e57fc8f14397f03b5da8622b4e"}, - {file = "coverage-6.4.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5336e0352c0b12c7e72727d50ff02557005f79a0b8dcad9219c7c4940a930083"}, - {file = "coverage-6.4.2-cp37-cp37m-win32.whl", hash = "sha256:0f211df2cba951ffcae210ee00e54921ab42e2b64e0bf2c0befc977377fb09b7"}, - {file = "coverage-6.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a13772c19619118903d65a91f1d5fea84be494d12fd406d06c849b00d31bf120"}, - {file = "coverage-6.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f7bd0ffbcd03dc39490a1f40b2669cc414fae0c4e16b77bb26806a4d0b7d1452"}, - {file = "coverage-6.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0895ea6e6f7f9939166cc835df8fa4599e2d9b759b02d1521b574e13b859ac32"}, - {file = "coverage-6.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4e7ced84a11c10160c0697a6cc0b214a5d7ab21dfec1cd46e89fbf77cc66fae"}, - {file = "coverage-6.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80db4a47a199c4563d4a25919ff29c97c87569130375beca3483b41ad5f698e8"}, - {file = "coverage-6.4.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3def6791adf580d66f025223078dc84c64696a26f174131059ce8e91452584e1"}, - {file = "coverage-6.4.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4f89d8e03c8a3757aae65570d14033e8edf192ee9298303db15955cadcff0c63"}, - {file = "coverage-6.4.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6d0b48aff8e9720bdec315d67723f0babd936a7211dc5df453ddf76f89c59933"}, - {file = "coverage-6.4.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2b20286c2b726f94e766e86a3fddb7b7e37af5d0c635bdfa7e4399bc523563de"}, - {file = "coverage-6.4.2-cp38-cp38-win32.whl", hash = "sha256:d714af0bdba67739598849c9f18efdcc5a0412f4993914a0ec5ce0f1e864d783"}, - {file = "coverage-6.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:5f65e5d3ff2d895dab76b1faca4586b970a99b5d4b24e9aafffc0ce94a6022d6"}, - {file = "coverage-6.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a697977157adc052284a7160569b36a8bbec09db3c3220642e6323b47cec090f"}, - {file = "coverage-6.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c77943ef768276b61c96a3eb854eba55633c7a3fddf0a79f82805f232326d33f"}, - {file = "coverage-6.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54d8d0e073a7f238f0666d3c7c0d37469b2aa43311e4024c925ee14f5d5a1cbe"}, - {file = "coverage-6.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f22325010d8824594820d6ce84fa830838f581a7fd86a9235f0d2ed6deb61e29"}, - {file = "coverage-6.4.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b04d305ea172ccb21bee5bacd559383cba2c6fcdef85b7701cf2de4188aa55"}, - {file = "coverage-6.4.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:866ebf42b4c5dbafd64455b0a1cd5aa7b4837a894809413b930026c91e18090b"}, - {file = "coverage-6.4.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e36750fbbc422c1c46c9d13b937ab437138b998fe74a635ec88989afb57a3978"}, - {file = "coverage-6.4.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:79419370d6a637cb18553ecb25228893966bd7935a9120fa454e7076f13b627c"}, - {file = "coverage-6.4.2-cp39-cp39-win32.whl", hash = "sha256:b5e28db9199dd3833cc8a07fa6cf429a01227b5d429facb56eccd765050c26cd"}, - {file = "coverage-6.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:edfdabe7aa4f97ed2b9dd5dde52d2bb29cb466993bb9d612ddd10d0085a683cf"}, - {file = "coverage-6.4.2-pp36.pp37.pp38-none-any.whl", hash = "sha256:e2618cb2cf5a7cc8d698306e42ebcacd02fb7ef8cfc18485c59394152c70be97"}, - {file = "coverage-6.4.2.tar.gz", hash = "sha256:6c3ccfe89c36f3e5b9837b9ee507472310164f352c9fe332120b764c9d60adbe"}, -] -distlib = [ - {file = "distlib-0.3.5-py2.py3-none-any.whl", hash = "sha256:b710088c59f06338ca514800ad795a132da19fda270e3ce4affc74abf955a26c"}, - {file = "distlib-0.3.5.tar.gz", hash = "sha256:a7f75737c70be3b25e2bee06288cec4e4c221de18455b2dd037fe2a795cab2fe"}, -] +coverage = [] +distlib = [] docutils = [ {file = "docutils-0.19-py3-none-any.whl", hash = "sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc"}, {file = "docutils-0.19.tar.gz", hash = "sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6"}, ] -fastavro = [ - {file = "fastavro-1.5.3-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:bcaf1c9ead0aa1c7306cff229f2c207d714f7f765e0bb010e64c74d379008555"}, - {file = "fastavro-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ce66549bdbc9d43f40e1858796c1db59fa9f63f2368217071929c39f7ee7c1e"}, - {file = "fastavro-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c14c72f79bcb689edea06ec276bfd5c4d4f17b8e32f545e32a6ee108c5f09de"}, - {file = "fastavro-1.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:6ad6af14d5e927e0c5fbc5dcb7c7030ef8759c8011620344a57252c263d6dd5a"}, - {file = "fastavro-1.5.3-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:3a4fa9f72c9b134fcdb0ee30d4a838f1d30656ece72a95f18cc74db10148ee55"}, - {file = "fastavro-1.5.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8980f2bc1230aac509273938d23c544283a476bf84ac79b80540d503f941fe5"}, - {file = "fastavro-1.5.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78ffb34790791a82edef9cce7e49e44faf1b6b0b1da12e9c7d45f31fd6655a7b"}, - {file = "fastavro-1.5.3-cp37-cp37m-win_amd64.whl", hash = "sha256:86a33b2f819cad67f002250e9c6a62909fc31e048679da5f2ac02f4bbf0a5998"}, - {file = "fastavro-1.5.3-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:c5043cfbdfed27f1331860f1d215db838b92d575e8a78915c52c3f6c64d42176"}, - {file = "fastavro-1.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:badcbd1c34f446f4b64ade84f9517ef7d52197e0ea9dd6b3775274f2271dba9a"}, - {file = "fastavro-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:259ade020a70870858b656219d25f566eb620e52967070f9e18b040f1c0cd482"}, - {file = "fastavro-1.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:b0408d6b02abcbd4e8866b6b027d7cdd20d570e78177fb7c363fb3cc8ecb02a3"}, - {file = "fastavro-1.5.3-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:063f17e000b7a4174ad362ffc91b75de85970cc67fccfed2bd12e21908de6028"}, - {file = "fastavro-1.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b399317d4f559d8f3ef695ef8db8ea230e4b489437d2a1e50307120e181e831"}, - {file = "fastavro-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:298e98c55e98f579df66e8258a942494a64b5340212968ed1b6bd9a68c52db5f"}, - {file = "fastavro-1.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:14c23dc870785d860f1523e7be7ce4079dc36070f1bad18139304a5f4d654180"}, - {file = "fastavro-1.5.3.tar.gz", hash = "sha256:262a9f629448a0d4941a4871fd958b70a01f48a0106644b40e4fbaf984f5e89e"}, -] +fastavro = [] filelock = [ {file = "filelock-3.7.1-py3-none-any.whl", hash = "sha256:37def7b658813cda163b56fc564cdc75e86d338246458c4c28ae84cabefa2404"}, {file = "filelock-3.7.1.tar.gz", hash = "sha256:3a0fd85166ad9dbab54c9aec96737b744106dc5f15c0b09a6744a445299fcf04"}, ] -identify = [ - {file = "identify-2.5.2-py2.py3-none-any.whl", hash = "sha256:feaa9db2dc0ce333b453ce171c0cf1247bbfde2c55fc6bb785022d411a1b78b5"}, - {file = "identify-2.5.2.tar.gz", hash = "sha256:a3d4c096b384d50d5e6dc5bc8b9bc44f1f61cefebd750a7b3e9f939b53fb214d"}, +identify = [] +idna = [ + {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, + {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, ] importlib-metadata = [ {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, @@ -617,30 +635,7 @@ nodeenv = [ {file = "nodeenv-1.7.0-py2.py3-none-any.whl", hash = "sha256:27083a7b96a25f2f5e1d8cb4b6317ee8aeda3bdd121394e5ac54e498028a042e"}, {file = "nodeenv-1.7.0.tar.gz", hash = "sha256:e0e7f7dfb85fc5394c6fe1e8fa98131a2473e04311a45afb6508f7cf1836fa2b"}, ] -numpy = [ - {file = "numpy-1.23.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b15c3f1ed08df4980e02cc79ee058b788a3d0bef2fb3c9ca90bb8cbd5b8a3a04"}, - {file = "numpy-1.23.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ce242162015b7e88092dccd0e854548c0926b75c7924a3495e02c6067aba1f5"}, - {file = "numpy-1.23.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d7447679ae9a7124385ccf0ea990bb85bb869cef217e2ea6c844b6a6855073"}, - {file = "numpy-1.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3119daed207e9410eaf57dcf9591fdc68045f60483d94956bee0bfdcba790953"}, - {file = "numpy-1.23.1-cp310-cp310-win32.whl", hash = "sha256:3ab67966c8d45d55a2bdf40701536af6443763907086c0a6d1232688e27e5447"}, - {file = "numpy-1.23.1-cp310-cp310-win_amd64.whl", hash = "sha256:1865fdf51446839ca3fffaab172461f2b781163f6f395f1aed256b1ddc253622"}, - {file = "numpy-1.23.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeba539285dcf0a1ba755945865ec61240ede5432df41d6e29fab305f4384db2"}, - {file = "numpy-1.23.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7e8229f3687cdadba2c4faef39204feb51ef7c1a9b669247d49a24f3e2e1617c"}, - {file = "numpy-1.23.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68b69f52e6545af010b76516f5daaef6173e73353e3295c5cb9f96c35d755641"}, - {file = "numpy-1.23.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1408c3527a74a0209c781ac82bde2182b0f0bf54dea6e6a363fe0cc4488a7ce7"}, - {file = "numpy-1.23.1-cp38-cp38-win32.whl", hash = "sha256:47f10ab202fe4d8495ff484b5561c65dd59177949ca07975663f4494f7269e3e"}, - {file = "numpy-1.23.1-cp38-cp38-win_amd64.whl", hash = "sha256:37e5ebebb0eb54c5b4a9b04e6f3018e16b8ef257d26c8945925ba8105008e645"}, - {file = "numpy-1.23.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:173f28921b15d341afadf6c3898a34f20a0569e4ad5435297ba262ee8941e77b"}, - {file = "numpy-1.23.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:876f60de09734fbcb4e27a97c9a286b51284df1326b1ac5f1bf0ad3678236b22"}, - {file = "numpy-1.23.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35590b9c33c0f1c9732b3231bb6a72d1e4f77872390c47d50a615686ae7ed3fd"}, - {file = "numpy-1.23.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a35c4e64dfca659fe4d0f1421fc0f05b8ed1ca8c46fb73d9e5a7f175f85696bb"}, - {file = "numpy-1.23.1-cp39-cp39-win32.whl", hash = "sha256:c2f91f88230042a130ceb1b496932aa717dcbd665350beb821534c5c7e15881c"}, - {file = "numpy-1.23.1-cp39-cp39-win_amd64.whl", hash = "sha256:37ece2bd095e9781a7156852e43d18044fd0d742934833335599c583618181b9"}, - {file = "numpy-1.23.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:8002574a6b46ac3b5739a003b5233376aeac5163e5dcd43dd7ad062f3e186129"}, - {file = "numpy-1.23.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d732d17b8a9061540a10fda5bfeabca5785700ab5469a5e9b93aca5e2d3a5fb"}, - {file = "numpy-1.23.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:55df0f7483b822855af67e38fb3a526e787adf189383b4934305565d71c4b148"}, - {file = "numpy-1.23.1.tar.gz", hash = "sha256:d748ef349bfef2e1194b59da37ed5a29c19ea8d7e6342019921ba2ba4fd8b624"}, -] +numpy = [] packaging = [ {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, @@ -657,10 +652,7 @@ pluggy = [ {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, ] -pre-commit = [ - {file = "pre_commit-2.20.0-py2.py3-none-any.whl", hash = "sha256:51a5ba7c480ae8072ecdb6933df22d2f812dc897d5fe848778116129a681aac7"}, - {file = "pre_commit-2.20.0.tar.gz", hash = "sha256:a978dac7bc9ec0bcee55c18a277d553b0f419d259dadb4b9418ff2d00eb43959"}, -] +pre-commit = [] py = [ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, @@ -835,6 +827,11 @@ pyyaml = [ {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, ] +requests = [ + {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, + {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, +] +requests-mock = [] six = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -851,57 +848,10 @@ typing-extensions = [ {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"}, ] +urllib3 = [] virtualenv = [ {file = "virtualenv-20.15.1-py2.py3-none-any.whl", hash = "sha256:b30aefac647e86af6d82bfc944c556f8f1a9c90427b2fb4e3bfbf338cb82becf"}, {file = "virtualenv-20.15.1.tar.gz", hash = "sha256:288171134a2ff3bfb1a2f54f119e77cd1b81c29fc1265a2356f3e8d14c7d58c4"}, ] -zipp = [ - {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"}, - {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"}, -] -zstandard = [ - {file = "zstandard-0.18.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef7e8a200e4c8ac9102ed3c90ed2aa379f6b880f63032200909c1be21951f556"}, - {file = "zstandard-0.18.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2dc466207016564805e56d28375f4f533b525ff50d6776946980dff5465566ac"}, - {file = "zstandard-0.18.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a2ee1d4f98447f3e5183ecfce5626f983504a4a0c005fbe92e60fa8e5d547ec"}, - {file = "zstandard-0.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d956e2f03c7200d7e61345e0880c292783ec26618d0d921dcad470cb195bbce2"}, - {file = "zstandard-0.18.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ce6f59cba9854fd14da5bfe34217a1501143057313966637b7291d1b0267bd1e"}, - {file = "zstandard-0.18.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7fa67cba473623848b6e88acf8d799b1906178fd883fb3a1da24561c779593b"}, - {file = "zstandard-0.18.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cdb44d7284c8c5dd1b66dfb86dda7f4560fa94bfbbc1d2da749ba44831335e32"}, - {file = "zstandard-0.18.0-cp310-cp310-win32.whl", hash = "sha256:63694a376cde0aa8b1971d06ca28e8f8b5f492779cb6ee1cc46bbc3f019a42a5"}, - {file = "zstandard-0.18.0-cp310-cp310-win_amd64.whl", hash = "sha256:702a8324cd90c74d9c8780d02bf55e79da3193c870c9665ad3a11647e3ad1435"}, - {file = "zstandard-0.18.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:46f679bc5dfd938db4fb058218d9dc4db1336ffaf1ea774ff152ecadabd40805"}, - {file = "zstandard-0.18.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc2a4de9f363b3247d472362a65041fe4c0f59e01a2846b15d13046be866a885"}, - {file = "zstandard-0.18.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd3220d7627fd4d26397211cb3b560ec7cc4a94b75cfce89e847e8ce7fabe32d"}, - {file = "zstandard-0.18.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:39e98cf4773234bd9cebf9f9db730e451dfcfe435e220f8921242afda8321887"}, - {file = "zstandard-0.18.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5228e596eb1554598c872a337bbe4e5afe41cd1f8b1b15f2e35b50d061e35244"}, - {file = "zstandard-0.18.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d4a8fd45746a6c31e729f35196e80b8f1e9987c59f5ccb8859d7c6a6fbeb9c63"}, - {file = "zstandard-0.18.0-cp36-cp36m-win32.whl", hash = "sha256:4cbb85f29a990c2fdbf7bc63246567061a362ddca886d7fae6f780267c0a9e67"}, - {file = "zstandard-0.18.0-cp36-cp36m-win_amd64.whl", hash = "sha256:bfa6c8549fa18e6497a738b7033c49f94a8e2e30c5fbe2d14d0b5aa8bbc1695d"}, - {file = "zstandard-0.18.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e02043297c1832f2666cd2204f381bef43b10d56929e13c42c10c732c6e3b4ed"}, - {file = "zstandard-0.18.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7231543d38d2b7e02ef7cc78ef7ffd86419437e1114ff08709fe25a160e24bd6"}, - {file = "zstandard-0.18.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c86befac87445927488f5c8f205d11566f64c11519db223e9d282b945fa60dab"}, - {file = "zstandard-0.18.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:999a4e1768f219826ba3fa2064fab1c86dd72fdd47a42536235478c3bb3ca3e2"}, - {file = "zstandard-0.18.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9df59cd1cf3c62075ee2a4da767089d19d874ac3ad42b04a71a167e91b384722"}, - {file = "zstandard-0.18.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1be31e9e3f7607ee0cdd60915410a5968b205d3e7aa83b7fcf3dd76dbbdb39e0"}, - {file = "zstandard-0.18.0-cp37-cp37m-win32.whl", hash = "sha256:490d11b705b8ae9dc845431bacc8dd1cef2408aede176620a5cd0cd411027936"}, - {file = "zstandard-0.18.0-cp37-cp37m-win_amd64.whl", hash = "sha256:266aba27fa9cc5e9091d3d325ebab1fa260f64e83e42516d5e73947c70216a5b"}, - {file = "zstandard-0.18.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8b2260c4e07dd0723eadb586de7718b61acca4083a490dda69c5719d79bc715c"}, - {file = "zstandard-0.18.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3af8c2383d02feb6650e9255491ec7d0824f6e6dd2bbe3e521c469c985f31fb1"}, - {file = "zstandard-0.18.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28723a1d2e4df778573b76b321ebe9f3469ac98988104c2af116dd344802c3f8"}, - {file = "zstandard-0.18.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19cac7108ff2c342317fad6dc97604b47a41f403c8f19d0bfc396dfadc3638b8"}, - {file = "zstandard-0.18.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:76725d1ee83a8915100a310bbad5d9c1fc6397410259c94033b8318d548d9990"}, - {file = "zstandard-0.18.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d716a7694ce1fa60b20bc10f35c4a22be446ef7f514c8dbc8f858b61976de2fb"}, - {file = "zstandard-0.18.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:49685bf9a55d1ab34bd8423ea22db836ba43a181ac6b045ac4272093d5cb874e"}, - {file = "zstandard-0.18.0-cp38-cp38-win32.whl", hash = "sha256:1af1268a7dc870eb27515fb8db1f3e6c5a555d2b7bcc476fc3bab8886c7265ab"}, - {file = "zstandard-0.18.0-cp38-cp38-win_amd64.whl", hash = "sha256:1dc2d3809e763055a1a6c1a73f2b677320cc9a5aa1a7c6cfb35aee59bddc42d9"}, - {file = "zstandard-0.18.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eea18c1e7442f2aa9aff1bb84550dbb6a1f711faf6e48e7319de8f2b2e923c2a"}, - {file = "zstandard-0.18.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8677ffc6a6096cccbd892e558471c901fd821aba12b7fbc63833c7346f549224"}, - {file = "zstandard-0.18.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083dc08abf03807af9beeb2b6a91c23ad78add2499f828176a3c7b742c44df02"}, - {file = "zstandard-0.18.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c990063664c08169c84474acecc9251ee035871589025cac47c060ff4ec4bc1a"}, - {file = "zstandard-0.18.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:533db8a6fac6248b2cb2c935e7b92f994efbdeb72e1ffa0b354432e087bb5a3e"}, - {file = "zstandard-0.18.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbb3cb8a082d62b8a73af42291569d266b05605e017a3d8a06a0e5c30b5f10f0"}, - {file = "zstandard-0.18.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d6c85ca5162049ede475b7ec98e87f9390501d44a3d6776ddd504e872464ec25"}, - {file = "zstandard-0.18.0-cp39-cp39-win32.whl", hash = "sha256:75479e7c2b3eebf402c59fbe57d21bc400cefa145ca356ee053b0a08908c5784"}, - {file = "zstandard-0.18.0-cp39-cp39-win_amd64.whl", hash = "sha256:d85bfabad444812133a92fc6fbe463e1d07581dba72f041f07a360e63808b23c"}, - {file = "zstandard-0.18.0.tar.gz", hash = "sha256:0ac0357a0d985b4ff31a854744040d7b5754385d1f98f7145c30e02c6865cb6f"}, -] +zipp = [] +zstandard = [] diff --git a/python/pyiceberg/catalog/base.py b/python/pyiceberg/catalog/base.py index 6a2dcf9e029d..d70fd7b57c70 100644 --- a/python/pyiceberg/catalog/base.py +++ b/python/pyiceberg/catalog/base.py @@ -18,11 +18,20 @@ from __future__ import annotations from abc import ABC, abstractmethod +from dataclasses import dataclass from pyiceberg.catalog import Identifier, Properties from pyiceberg.schema import Schema from pyiceberg.table.base import Table -from pyiceberg.table.partitioning import PartitionSpec +from pyiceberg.table.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec +from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder + + +@dataclass +class PropertiesUpdateSummary: + removed: list[str] + updated: list[str] + missing: list[str] class Catalog(ABC): @@ -57,7 +66,8 @@ def create_table( identifier: str | Identifier, schema: Schema, location: str | None = None, - partition_spec: PartitionSpec | None = None, + partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC, + sort_order: SortOrder = UNSORTED_SORT_ORDER, properties: Properties | None = None, ) -> Table: """Create a table @@ -66,7 +76,8 @@ def create_table( identifier: Table identifier. schema: Table's schema. location: Location for the table. Optional Argument. - partition_spec: PartitionSpec for the table. Optional Argument. + partition_spec: PartitionSpec for the table. + sort_order: SortOrder for the table. properties: Table properties that can be a string based dictionary. Optional Argument. Returns: @@ -195,7 +206,7 @@ def load_namespace_properties(self, namespace: str | Identifier) -> Properties: @abstractmethod def update_namespace_properties( self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties | None = None - ) -> None: + ) -> PropertiesUpdateSummary: """Removes provided property keys and updates properties for a namespace. Args: diff --git a/python/pyiceberg/catalog/rest.py b/python/pyiceberg/catalog/rest.py new file mode 100644 index 000000000000..fc228295c9af --- /dev/null +++ b/python/pyiceberg/catalog/rest.py @@ -0,0 +1,434 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from json import JSONDecodeError +from typing import ( + Dict, + List, + Optional, + Set, + Tuple, + Type, + Union, +) + +import requests +from pydantic import Field +from requests import HTTPError + +from pyiceberg import __version__ +from pyiceberg.catalog import Identifier, Properties +from pyiceberg.catalog.base import Catalog, PropertiesUpdateSummary +from pyiceberg.exceptions import ( + AlreadyExistsError, + AuthorizationExpiredError, + BadCredentialsError, + BadRequestError, + ForbiddenError, + NoSuchNamespaceError, + NoSuchTableError, + RESTError, + ServerError, + ServiceUnavailableError, + TableAlreadyExistsError, + UnauthorizedError, +) +from pyiceberg.schema import Schema +from pyiceberg.table.base import Table +from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2 +from pyiceberg.table.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec +from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder +from pyiceberg.utils.iceberg_base_model import IcebergBaseModel + + +class Endpoints: + get_config: str = "config" + list_namespaces: str = "namespaces" + create_namespace: str = "namespaces" + load_namespace_metadata: str = "namespaces/{namespace}" + drop_namespace: str = "namespaces/{namespace}" + update_properties: str = "namespaces/{namespace}/properties" + list_tables: str = "namespaces/{namespace}/tables" + create_table: str = "namespaces/{namespace}/tables" + load_table: str = "namespaces/{namespace}/tables/{table}" + update_table: str = "namespaces/{namespace}/tables/{table}" + drop_table: str = "namespaces/{namespace}/tables/{table}?purgeRequested={purge}" + table_exists: str = "namespaces/{namespace}/tables/{table}" + get_token: str = "oauth/tokens" + rename_table: str = "tables/rename" + + +AUTHORIZATION_HEADER = "Authorization" +BEARER_PREFIX = "Bearer" +CATALOG_SCOPE = "catalog" +CLIENT_ID = "client_id" +PREFIX = "prefix" +CLIENT_SECRET = "client_secret" +CLIENT_CREDENTIALS = "client_credentials" +CREDENTIAL = "credential" +GRANT_TYPE = "grant_type" +SCOPE = "scope" +TOKEN_EXCHANGE = "urn:ietf:params:oauth:grant-type:token-exchange" + +NAMESPACE_SEPARATOR = b"\x1F".decode("UTF-8") + + +class TableResponse(IcebergBaseModel): + metadata_location: Optional[str] = Field(alias="metadata-location", default=None) + metadata: Union[TableMetadataV1, TableMetadataV2] = Field() + config: Properties = Field(default_factory=dict) + + +class CreateTableRequest(IcebergBaseModel): + name: str = Field() + location: Optional[str] = Field() + table_schema: Schema = Field(alias="schema") + partition_spec: Optional[PartitionSpec] = Field(alias="partition-spec") + write_order: Optional[SortOrder] = Field(alias="write-order") + stage_create: bool = Field(alias="stage-create", default=False) + properties: Properties = Field(default_factory=dict) + + +class TokenResponse(IcebergBaseModel): + access_token: str = Field() + token_type: str = Field() + expires_in: int = Field() + issued_token_type: str = Field() + + +class ConfigResponse(IcebergBaseModel): + defaults: Properties = Field() + overrides: Properties = Field() + + +class ListNamespaceResponse(IcebergBaseModel): + namespaces: List[Identifier] = Field() + + +class NamespaceResponse(IcebergBaseModel): + namespace: Identifier = Field() + properties: Properties = Field() + + +class UpdateNamespacePropertiesResponse(IcebergBaseModel): + removed: List[str] = Field() + updated: List[str] = Field() + missing: List[str] = Field() + + +class ListTableResponseEntry(IcebergBaseModel): + name: str = Field() + namespace: Identifier = Field() + + +class ListTablesResponse(IcebergBaseModel): + identifiers: List[ListTableResponseEntry] = Field() + + +class ErrorResponseMessage(IcebergBaseModel): + message: str = Field() + type: str = Field() + code: int = Field() + + +class ErrorResponse(IcebergBaseModel): + error: ErrorResponseMessage = Field() + + +class RestCatalog(Catalog): + token: str + config: Properties + + host: str + + def __init__( + self, + name: str, + properties: Properties, + host: str, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + token: Optional[str] = None, + ): + """Rest Catalog + + You either need to provide a client_id and client_secret, or an already valid token. + + Args: + name: Name to identify the catalog + properties: Properties that are passed along to the configuration + host: The base-url of the REST Catalog endpoint + client_id: The id to identify the client + client_secret: The secret for the client + token: The bearer token + """ + self.host = host + if client_id and client_secret: + self.token = self._fetch_access_token(client_id, client_secret) + elif token: + self.token = token + else: + raise ValueError("Either set the client_id and client_secret, or provide a valid token") + self.config = self._fetch_config(properties) + super().__init__(name, properties) + + @staticmethod + def _split_credential(token: str) -> Tuple[str, str]: + """Splits the token in a client id and secret + + Args: + token: The token with a semicolon as a separator + + Returns: + The client id and secret + """ + client, secret = token.split(":") + return client, secret + + @property + def headers(self) -> Properties: + return { + AUTHORIZATION_HEADER: f"{BEARER_PREFIX} {self.token}", + "Content-type": "application/json", + "X-Client-Version": __version__, + } + + def url(self, endpoint: str, prefixed: bool = True, **kwargs) -> str: + """Constructs the endpoint + + Args: + prefixed: If the prefix return by the config needs to be appended + + Returns: + The base url of the rest catalog + """ + + url = self.host + url = url + "v1/" if url.endswith("/") else url + "/v1/" + + if prefixed: + url += self.config.get(PREFIX, "") + url = url if url.endswith("/") else url + "/" + + return url + endpoint.format(**kwargs) + + def _fetch_access_token(self, client_id: str, client_secret: str) -> str: + data = {GRANT_TYPE: CLIENT_CREDENTIALS, CLIENT_ID: client_id, CLIENT_SECRET: client_secret, SCOPE: CATALOG_SCOPE} + url = self.url(Endpoints.get_token, prefixed=False) + # Uses application/x-www-form-urlencoded by default + response = requests.post(url=url, data=data) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {401: BadCredentialsError}) + + return TokenResponse(**response.json()).access_token + + def _fetch_config(self, properties: Properties) -> Properties: + response = requests.get(self.url(Endpoints.get_config, prefixed=False), headers=self.headers) + response.raise_for_status() + config_response = ConfigResponse(**response.json()) + config = config_response.defaults + config.update(properties) + config.update(config_response.overrides) + return config + + def _split_identifier_for_path(self, identifier: Union[str, Identifier]) -> Properties: + identifier = self.identifier_to_tuple(identifier) + return {"namespace": NAMESPACE_SEPARATOR.join(identifier[:-1]), "table": identifier[-1]} + + def _split_identifier_for_json(self, identifier: Union[str, Identifier]) -> Dict[str, Union[Identifier, str]]: + identifier = self.identifier_to_tuple(identifier) + return {"namespace": identifier[:-1], "name": identifier[-1]} + + def _handle_non_200_response(self, exc: HTTPError, error_handler: Dict[int, Type[Exception]]): + try: + response = ErrorResponse(**exc.response.json()) + except JSONDecodeError: + # In the case we don't have a proper response + response = ErrorResponse( + error=ErrorResponseMessage( + message=f"Could not decode json payload: {exc.response.text}", + type="RESTError", + code=exc.response.status_code, + ) + ) + + code = exc.response.status_code + if code in error_handler: + raise error_handler[code](response.error.message) from exc + elif code == 400: + raise BadRequestError(response.error.message) from exc + elif code == 401: + raise UnauthorizedError(response.error.message) from exc + elif code == 403: + raise ForbiddenError(response.error.message) from exc + elif code == 422: + raise RESTError(response.error.message) from exc + elif code == 419: + raise AuthorizationExpiredError(response.error.message) + elif code == 501: + raise NotImplementedError(response.error.message) + elif code == 503: + raise ServiceUnavailableError(response.error.message) from exc + elif 500 <= code < 600: + raise ServerError(response.error.message) from exc + else: + raise RESTError(response.error.message) from exc + + def create_table( + self, + identifier: Union[str, Identifier], + schema: Schema, + location: Optional[str] = None, + partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC, + sort_order: SortOrder = UNSORTED_SORT_ORDER, + properties: Optional[Properties] = None, + ) -> Table: + namespace_and_table = self._split_identifier_for_path(identifier) + properties = properties or {} + request = CreateTableRequest( + name=namespace_and_table["table"], + location=location, + table_schema=schema, + partition_spec=partition_spec, + write_order=sort_order, + properties=properties, + ) + serialized_json = request.json() + response = requests.post( + self.url(Endpoints.create_table, namespace=namespace_and_table["namespace"]), + data=serialized_json, + headers=self.headers, + ) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {409: TableAlreadyExistsError}) + + table_response = TableResponse(**response.json()) + + return Table( + identifier=(self.name,) + self.identifier_to_tuple(identifier), + metadata_location=table_response.metadata_location, + metadata=table_response.metadata, + ) + + def list_tables(self, namespace: Optional[Union[str, Identifier]] = None) -> List[Identifier]: + namespace_concat = NAMESPACE_SEPARATOR.join(self.identifier_to_tuple(namespace or "")) + response = requests.get( + self.url(Endpoints.list_tables, namespace=namespace_concat), + headers=self.headers, + ) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {404: NoSuchNamespaceError}) + return [(*table.namespace, table.name) for table in ListTablesResponse(**response.json()).identifiers] + + def load_table(self, identifier: Union[str, Identifier]) -> Table: + response = requests.get( + self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier)), headers=self.headers + ) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {404: NoSuchTableError}) + + table_response = TableResponse(**response.json()) + + return Table( + identifier=(self.name,) + self.identifier_to_tuple(identifier), + metadata_location=table_response.metadata_location, + metadata=table_response.metadata, + ) + + def drop_table(self, identifier: Union[str, Identifier], purge_requested: bool = False) -> None: + response = requests.delete( + self.url(Endpoints.drop_table, prefixed=True, purge=purge_requested, **self._split_identifier_for_path(identifier)), + headers=self.headers, + ) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {404: NoSuchTableError}) + + def purge_table(self, identifier: Union[str, Identifier]) -> None: + self.drop_table(identifier=identifier, purge_requested=True) + + def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]): + payload = { + "source": self._split_identifier_for_json(from_identifier), + "destination": self._split_identifier_for_json(to_identifier), + } + response = requests.post(self.url(Endpoints.rename_table), json=payload, headers=self.headers) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {404: NoSuchTableError, 409: TableAlreadyExistsError}) + + def create_namespace(self, namespace: Union[str, Identifier], properties: Optional[Properties] = None) -> None: + payload = {"namespace": self.identifier_to_tuple(namespace), "properties": properties or {}} + response = requests.post(self.url(Endpoints.create_namespace), json=payload, headers=self.headers) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {404: NoSuchNamespaceError, 409: AlreadyExistsError}) + + def drop_namespace(self, namespace: Union[str, Identifier]) -> None: + namespace = NAMESPACE_SEPARATOR.join(self.identifier_to_tuple(namespace)) + response = requests.delete(self.url(Endpoints.drop_namespace, namespace=namespace), headers=self.headers) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {404: NoSuchNamespaceError}) + + def list_namespaces(self) -> List[Identifier]: + response = requests.get(self.url(Endpoints.list_namespaces), headers=self.headers) + response.raise_for_status() + namespaces = ListNamespaceResponse(**response.json()) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {}) + return namespaces.namespaces + + def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties: + namespace = NAMESPACE_SEPARATOR.join(self.identifier_to_tuple(namespace)) + response = requests.get(self.url(Endpoints.load_namespace_metadata, namespace=namespace), headers=self.headers) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {404: NoSuchNamespaceError}) + + return NamespaceResponse(**response.json()).properties + + def update_namespace_properties( + self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Optional[Properties] = None + ) -> PropertiesUpdateSummary: + namespace = NAMESPACE_SEPARATOR.join(self.identifier_to_tuple(namespace)) + payload = {"removals": list(removals or []), "updates": updates} + response = requests.post(self.url(Endpoints.update_properties, namespace=namespace), json=payload, headers=self.headers) + try: + response.raise_for_status() + except HTTPError as exc: + self._handle_non_200_response(exc, {404: NoSuchNamespaceError}) + parsed_response = UpdateNamespacePropertiesResponse(**response.json()) + return PropertiesUpdateSummary( + removed=parsed_response.removed, + updated=parsed_response.updated, + missing=parsed_response.missing, + ) diff --git a/python/pyiceberg/exceptions.py b/python/pyiceberg/exceptions.py index f9ac3333b109..f38cd0df6708 100644 --- a/python/pyiceberg/exceptions.py +++ b/python/pyiceberg/exceptions.py @@ -16,12 +16,8 @@ # under the License. -class NoSuchTableError(Exception): - """Raised when a referenced table is not found""" - - -class NoSuchNamespaceError(Exception): - """Raised when a referenced name-space is not found""" +class TableAlreadyExistsError(Exception): + """Raised when creating a table with a name that already exists""" class NamespaceNotEmptyError(Exception): @@ -34,3 +30,43 @@ class AlreadyExistsError(Exception): class ValidationError(Exception): """Raises when there is an issue with the schema""" + + +class RESTError(Exception): + """Raises when there is an unknown response from the REST Catalog""" + + +class BadCredentialsError(RESTError): + """Raises when providing invalid credentials""" + + +class BadRequestError(RESTError): + """Raises when an invalid request is being made""" + + +class UnauthorizedError(RESTError): + """Raises when you don't have the proper authorization""" + + +class ServiceUnavailableError(RESTError): + """Raises when the service doesn't respond""" + + +class ServerError(RESTError): + """Raises when there is an unhandled exception on the server side""" + + +class ForbiddenError(RESTError): + """Raises when you don't have the credentials to perform the action on the REST catalog""" + + +class AuthorizationExpiredError(RESTError): + """When the credentials are expired when performing an action on the REST catalog""" + + +class NoSuchTableError(RESTError): + """Raises when the table can't be found in the REST catalog""" + + +class NoSuchNamespaceError(RESTError): + """Raised when a referenced name-space is not found""" diff --git a/python/pyiceberg/schema.py b/python/pyiceberg/schema.py index ea4f4cf6c055..5e5d4af56ddc 100644 --- a/python/pyiceberg/schema.py +++ b/python/pyiceberg/schema.py @@ -24,6 +24,7 @@ Dict, Generic, List, + Literal, Optional, Tuple, TypeVar, @@ -54,6 +55,7 @@ class Schema(IcebergBaseModel): >>> from pyiceberg import types """ + type: Literal["struct"] = "struct" fields: Tuple[NestedField, ...] = Field(default_factory=tuple) schema_id: int = Field(alias="schema-id") identifier_field_ids: List[int] = Field(alias="identifier-field-ids", default_factory=list) diff --git a/python/pyiceberg/table/base.py b/python/pyiceberg/table/base.py index 77821547e7c9..5e7c06a5b2d1 100644 --- a/python/pyiceberg/table/base.py +++ b/python/pyiceberg/table/base.py @@ -15,17 +15,16 @@ # specific language governing permissions and limitations # under the License. -from __future__ import annotations +from typing import Optional, Union -from abc import ABC +from pydantic import Field from pyiceberg.catalog.base import Identifier +from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2 +from pyiceberg.utils.iceberg_base_model import IcebergBaseModel -class Table(ABC): - """Placeholder for Table managed by the Catalog that points to the current Table Metadata. - - To be implemented by https://github.com/apache/iceberg/issues/3227 - """ - - identifier: str | Identifier +class Table(IcebergBaseModel): + identifier: Identifier = Field() + metadata_location: Optional[str] = Field() + metadata: Union[TableMetadataV1, TableMetadataV2] = Field() diff --git a/python/pyiceberg/table/metadata.py b/python/pyiceberg/table/metadata.py index 6543d4ce30e6..9608bf138bcc 100644 --- a/python/pyiceberg/table/metadata.py +++ b/python/pyiceberg/table/metadata.py @@ -38,6 +38,7 @@ INITIAL_SEQUENCE_NUMBER = 0 INITIAL_SPEC_ID = 0 DEFAULT_SCHEMA_ID = 0 +DEFAULT_LAST_PARTITION_ID = 1000 def check_schemas(values: Dict[str, Any]) -> Dict[str, Any]: @@ -103,7 +104,7 @@ def construct_refs(cls, data: Dict[str, Any]): """The table’s base location. This is used by writers to determine where to store data files, manifest files, and table metadata files.""" - table_uuid: Optional[UUID] = Field(alias="table-uuid") + table_uuid: Optional[UUID] = Field(alias="table-uuid", default_factory=uuid4) """A UUID that identifies the table, generated when the table is created. Implementations must throw an exception if a table’s UUID does not match the expected UUID after refreshing metadata.""" @@ -210,12 +211,10 @@ def set_v2_compatible_defaults(cls, data: Dict[str, Any]) -> Dict[str, Any]: Returns: The TableMetadata with the defaults applied """ - if "schema-id" not in data["schema"]: + if data.get("schema") and "schema-id" not in data["schema"]: data["schema"]["schema-id"] = DEFAULT_SCHEMA_ID - if "last-partition-id" not in data: + if data.get("partition-spec") and "last-partition-id" not in data: data["last-partition-id"] = max(spec["field-id"] for spec in data["partition-spec"]) - if "table-uuid" not in data: - data["table-uuid"] = uuid4() return data @root_validator(skip_on_failure=True) @@ -236,7 +235,7 @@ def construct_schemas(cls, data: Dict[str, Any]) -> Dict[str, Any]: schema = data["schema_"] data["schemas"] = [schema] else: - check_schemas(data["schemas"]) + check_schemas(data) return data @root_validator(skip_on_failure=True) @@ -257,7 +256,7 @@ def construct_partition_specs(cls, data: Dict[str, Any]) -> Dict[str, Any]: fields = data["partition_spec"] data["partition_specs"] = [PartitionSpec(spec_id=INITIAL_SPEC_ID, fields=fields)] else: - check_partition_specs(data["partition_specs"]) + check_partition_specs(data) return data @root_validator(skip_on_failure=True) @@ -273,10 +272,10 @@ def set_sort_orders(cls, data: Dict[str, Any]): Returns: The TableMetadata with the sort_orders set, if not provided """ - if sort_orders := data.get("sort_orders"): - check_sort_orders(sort_orders) - else: + if not data.get("sort_orders"): data["sort_orders"] = [UNSORTED_SORT_ORDER] + else: + check_sort_orders(data) return data def to_v2(self) -> "TableMetadataV2": diff --git a/python/pyiceberg/table/partitioning.py b/python/pyiceberg/table/partitioning.py index ef080bc307bc..f5222ad595ec 100644 --- a/python/pyiceberg/table/partitioning.py +++ b/python/pyiceberg/table/partitioning.py @@ -81,7 +81,7 @@ class PartitionSpec(IcebergBaseModel): """ spec_id: int = Field(alias="spec-id") - fields: Tuple[PartitionField, ...] = Field() + fields: Tuple[PartitionField, ...] = Field(default_factory=tuple) def __init__( self, @@ -154,3 +154,6 @@ def compatible_with(self, other: "PartitionSpec") -> bool: and this_field.name == that_field.name for this_field, that_field in zip(self.fields, other.fields) ) + + +UNPARTITIONED_PARTITION_SPEC = PartitionSpec(spec_id=0) diff --git a/python/pyiceberg/utils/iceberg_base_model.py b/python/pyiceberg/utils/iceberg_base_model.py index e218f9f89b5f..e3ad2eab0f38 100644 --- a/python/pyiceberg/utils/iceberg_base_model.py +++ b/python/pyiceberg/utils/iceberg_base_model.py @@ -37,6 +37,7 @@ class IcebergBaseModel(BaseModel): class Config: keep_untouched = (cached_property,) allow_population_by_field_name = True + copy_on_model_validation = False frozen = True def dict(self, exclude_none: bool = True, **kwargs): diff --git a/python/pyproject.toml b/python/pyproject.toml index 24cae0c813bb..55011bcaaa72 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -40,6 +40,8 @@ packages = [ [tool.poetry.dependencies] python = "^3.8" mmh3 = "^3.0.0" +requests = "^2.28.1" + pydantic = "^1.9.1" pyarrow = { version = "^8.0.0", optional = true } @@ -54,6 +56,7 @@ pytest-checkdocs = "^2.0.0" pre-commit = "^2.0.0" fastavro = "^1.5.1" coverage = { version = "^6.0.0", extras = ["toml"] } +requests-mock = "^1.9.3" [build-system] requires = ["poetry-core>=1.0.0"] @@ -111,5 +114,9 @@ ignore_missing_imports = true module = "mmh3.*" ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "requests_mock.*" +ignore_missing_imports = true + [tool.coverage.run] source = ['pyiceberg/'] diff --git a/python/tests/catalog/test_base.py b/python/tests/catalog/test_base.py index e4dd4d399248..49790ecbf3b1 100644 --- a/python/tests/catalog/test_base.py +++ b/python/tests/catalog/test_base.py @@ -26,7 +26,7 @@ import pytest from pyiceberg.catalog import Identifier, Properties -from pyiceberg.catalog.base import Catalog +from pyiceberg.catalog.base import Catalog, PropertiesUpdateSummary from pyiceberg.exceptions import ( AlreadyExistsError, NamespaceNotEmptyError, @@ -36,7 +36,9 @@ from pyiceberg.schema import Schema from pyiceberg.table.base import Table from pyiceberg.table.metadata import INITIAL_SPEC_ID -from pyiceberg.table.partitioning import PartitionSpec +from pyiceberg.table.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec +from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder +from tests.table.test_metadata import EXAMPLE_TABLE_METADATA_V1 class InMemoryCatalog(Catalog): @@ -55,7 +57,8 @@ def create_table( identifier: Union[str, Identifier], schema: Schema, location: Optional[str] = None, - partition_spec: Optional[PartitionSpec] = None, + partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC, + sort_order: SortOrder = UNSORTED_SORT_ORDER, properties: Optional[Properties] = None, ) -> Table: @@ -68,8 +71,7 @@ def create_table( if namespace not in self.__namespaces: self.__namespaces[namespace] = {} - table = Table() - table.identifier = identifier + table = Table(identifier=identifier, metadata=EXAMPLE_TABLE_METADATA_V1) self.__tables[identifier] = table return table @@ -102,9 +104,8 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U if to_namespace not in self.__namespaces: self.__namespaces[to_namespace] = {} - table.identifier = to_identifier - self.__tables[to_identifier] = table - return table + self.__tables[to_identifier] = Table(identifier=to_identifier, metadata=table.metadata) + return self.__tables[to_identifier] def create_namespace(self, namespace: Union[str, Identifier], properties: Optional[Properties] = None) -> None: namespace = Catalog.identifier_to_tuple(namespace) @@ -143,18 +144,30 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper def update_namespace_properties( self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Optional[Properties] = None - ) -> None: + ) -> PropertiesUpdateSummary: + removed: Set[str] = set() + updated: Set[str] = set() + namespace = Catalog.identifier_to_tuple(namespace) if namespace in self.__namespaces: if removals: for key in removals: if key in self.__namespaces[namespace]: del self.__namespaces[namespace][key] + removed.add(key) if updates: - self.__namespaces[namespace].update(updates) + for key, value in updates.items(): + self.__namespaces[namespace][key] = value + updated.add(key) else: raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") + expected_to_change = removed.difference(removals or set()) + + return PropertiesUpdateSummary( + removed=list(removed or []), updated=list(updates.keys() if updates else []), missing=list(expected_to_change) + ) + TEST_TABLE_IDENTIFIER = ("com", "organization", "department", "my_table") TEST_TABLE_NAMESPACE = ("com", "organization", "department") @@ -383,11 +396,12 @@ def test_update_namespace_metadata(catalog: InMemoryCatalog): # When new_metadata = {"key3": "value3", "key4": "value4"} - catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, updates=new_metadata) + summary = catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, updates=new_metadata) # Then assert TEST_TABLE_NAMESPACE in catalog.list_namespaces() assert new_metadata.items() <= catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).items() + assert summary == PropertiesUpdateSummary(removed=[], updated=["key3", "key4"], missing=[]) def test_update_namespace_metadata_removals(catalog: InMemoryCatalog): @@ -397,12 +411,13 @@ def test_update_namespace_metadata_removals(catalog: InMemoryCatalog): # When new_metadata = {"key3": "value3", "key4": "value4"} remove_metadata = {"key1"} - catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, remove_metadata, new_metadata) + summary = catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, remove_metadata, new_metadata) # Then assert TEST_TABLE_NAMESPACE in catalog.list_namespaces() assert new_metadata.items() <= catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).items() assert remove_metadata.isdisjoint(catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).keys()) + assert summary == PropertiesUpdateSummary(removed=["key1"], updated=["key3", "key4"], missing=[]) def test_update_namespace_metadata_raises_error_when_namespace_does_not_exist(catalog: InMemoryCatalog): diff --git a/python/tests/catalog/test_rest.py b/python/tests/catalog/test_rest.py new file mode 100644 index 000000000000..d5ccfe5b58aa --- /dev/null +++ b/python/tests/catalog/test_rest.py @@ -0,0 +1,612 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=redefined-outer-name +from uuid import UUID + +import pytest +from requests_mock import Mocker + +from pyiceberg.catalog.base import PropertiesUpdateSummary, Table +from pyiceberg.catalog.rest import RestCatalog +from pyiceberg.exceptions import ( + AlreadyExistsError, + BadCredentialsError, + NoSuchNamespaceError, + NoSuchTableError, + TableAlreadyExistsError, +) +from pyiceberg.schema import Schema +from pyiceberg.table.metadata import TableMetadataV1 +from pyiceberg.table.partitioning import PartitionField, PartitionSpec +from pyiceberg.table.refs import SnapshotRef, SnapshotRefType +from pyiceberg.table.snapshots import Operation, Snapshot, Summary +from pyiceberg.table.sorting import SortField, SortOrder +from pyiceberg.transforms import IdentityTransform, TruncateTransform +from pyiceberg.types import ( + BooleanType, + IntegerType, + NestedField, + StringType, +) + +TEST_HOST = "https://iceberg-test-catalog/" +TEST_CLIENT_ID = "client" +TEST_CLIENT_SECRET = "secret" +TEST_TOKEN = "some_jwt_token" + + +@pytest.fixture +def rest_mock(requests_mock: Mocker): + """Takes the default requests_mock and adds the config endpoint to it + + This endpoint is called when initializing the rest catalog + """ + requests_mock.get( + f"{TEST_HOST}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + return requests_mock + + +def test_token_200(rest_mock: Mocker): + rest_mock.post( + f"{TEST_HOST}v1/oauth/tokens", + json={ + "access_token": TEST_TOKEN, + "token_type": "Bearer", + "expires_in": 86400, + "issued_token_type": "urn:ietf:params:oauth:token-type:access_token", + }, + status_code=200, + ) + assert RestCatalog("rest", {}, TEST_HOST, TEST_CLIENT_ID, TEST_CLIENT_SECRET).token == TEST_TOKEN + + +def test_token_401(rest_mock: Mocker): + message = "Invalid client ID: abc" + rest_mock.post( + f"{TEST_HOST}v1/oauth/tokens", + json={ + "error": { + "message": message, + "type": "BadCredentialsException", + "code": 401, + } + }, + status_code=401, + ) + + with pytest.raises(BadCredentialsError) as e: + RestCatalog("rest", {}, TEST_HOST, client_id=TEST_CLIENT_ID, client_secret=TEST_CLIENT_SECRET) + assert message in str(e.value) + + +def test_list_tables_200(rest_mock: Mocker): + namespace = "examples" + rest_mock.get( + f"{TEST_HOST}v1/namespaces/{namespace}/tables", + json={"identifiers": [{"namespace": ["examples"], "name": "fooshare"}]}, + status_code=200, + ) + + assert RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).list_tables(namespace) == [("examples", "fooshare")] + + +def test_list_tables_404(rest_mock: Mocker): + namespace = "examples" + rest_mock.get( + f"{TEST_HOST}v1/namespaces/{namespace}/tables", + json={ + "error": { + "message": "Namespace does not exist: personal in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "NoSuchNamespaceException", + "code": 404, + } + }, + status_code=404, + ) + with pytest.raises(NoSuchNamespaceError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).list_tables(namespace) + assert "Namespace does not exist" in str(e.value) + + +def test_list_namespaces_200(rest_mock: Mocker): + rest_mock.get( + f"{TEST_HOST}v1/namespaces", + json={"namespaces": [["default"], ["examples"], ["fokko"], ["system"]]}, + status_code=200, + ) + assert RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).list_namespaces() == [ + ("default",), + ("examples",), + ("fokko",), + ("system",), + ] + + +def test_create_namespace_200(rest_mock: Mocker): + namespace = "leden" + rest_mock.post( + f"{TEST_HOST}v1/namespaces", + json={"namespace": [namespace], "properties": {}}, + status_code=200, + ) + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).create_namespace(namespace) + + +def test_create_namespace_409(rest_mock: Mocker): + namespace = "examples" + rest_mock.post( + f"{TEST_HOST}v1/namespaces", + json={ + "error": { + "message": "Namespace already exists: fokko in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "AlreadyExistsException", + "code": 409, + } + }, + status_code=409, + ) + with pytest.raises(AlreadyExistsError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).create_namespace(namespace) + assert "Namespace already exists" in str(e.value) + + +def test_drop_namespace_404(rest_mock: Mocker): + namespace = "examples" + rest_mock.delete( + f"{TEST_HOST}v1/namespaces/{namespace}", + json={ + "error": { + "message": "Namespace does not exist: leden in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "NoSuchNamespaceException", + "code": 404, + } + }, + status_code=404, + ) + with pytest.raises(NoSuchNamespaceError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).drop_namespace(namespace) + assert "Namespace does not exist" in str(e.value) + + +def test_load_namespace_properties_200(rest_mock: Mocker): + namespace = "leden" + rest_mock.get( + f"{TEST_HOST}v1/namespaces/{namespace}", + json={"namespace": ["fokko"], "properties": {"prop": "yes"}}, + status_code=204, + ) + assert RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).load_namespace_properties(namespace) == {"prop": "yes"} + + +def test_load_namespace_properties_404(rest_mock: Mocker): + namespace = "leden" + rest_mock.get( + f"{TEST_HOST}v1/namespaces/{namespace}", + json={ + "error": { + "message": "Namespace does not exist: fokko22 in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "NoSuchNamespaceException", + "code": 404, + } + }, + status_code=404, + ) + with pytest.raises(NoSuchNamespaceError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).load_namespace_properties(namespace) + assert "Namespace does not exist" in str(e.value) + + +def test_update_namespace_properties_200(rest_mock: Mocker): + rest_mock.post( + f"{TEST_HOST}v1/namespaces/fokko/properties", + json={"removed": [], "updated": ["prop"], "missing": ["abc"]}, + status_code=200, + ) + response = RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).update_namespace_properties( + ("fokko",), {"abc"}, {"prop": "yes"} + ) + + assert response == PropertiesUpdateSummary(removed=[], updated=["prop"], missing=["abc"]) + + +def test_update_namespace_properties_404(rest_mock: Mocker): + rest_mock.post( + f"{TEST_HOST}v1/namespaces/fokko/properties", + json={ + "error": { + "message": "Namespace does not exist: does_not_exists in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "NoSuchNamespaceException", + "code": 404, + } + }, + status_code=404, + ) + with pytest.raises(NoSuchNamespaceError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).update_namespace_properties(("fokko",), {"abc"}, {"prop": "yes"}) + assert "Namespace does not exist" in str(e.value) + + +def test_load_table_200(rest_mock: Mocker): + rest_mock.get( + f"{TEST_HOST}v1/namespaces/fokko/tables/table", + json={ + "metadataLocation": "s3://tabular-public-us-west-2-dev/bb30733e-8769-4dab-aa1b-e76245bb2bd4/b55d9dda-6561-423a-8bfc-787980ce421f/metadata/00001-5f2f8166-244c-4eae-ac36-384ecdec81fc.gz.metadata.json", + "metadata": { + "format-version": 1, + "table-uuid": "b55d9dda-6561-423a-8bfc-787980ce421f", + "location": "s3://tabular-public-us-west-2-dev/bb30733e-8769-4dab-aa1b-e76245bb2bd4/b55d9dda-6561-423a-8bfc-787980ce421f", + "last-updated-ms": 1646787054459, + "last-column-id": 2, + "schema": { + "type": "struct", + "schema-id": 0, + "fields": [ + {"id": 1, "name": "id", "required": False, "type": "int"}, + {"id": 2, "name": "data", "required": False, "type": "string"}, + ], + }, + "current-schema-id": 0, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + {"id": 1, "name": "id", "required": False, "type": "int"}, + {"id": 2, "name": "data", "required": False, "type": "string"}, + ], + } + ], + "partition-spec": [], + "default-spec-id": 0, + "partition-specs": [{"spec-id": 0, "fields": []}], + "last-partition-id": 999, + "default-sort-order-id": 0, + "sort-orders": [{"order-id": 0, "fields": []}], + "properties": {"owner": "bryan", "write.metadata.compression-codec": "gzip"}, + "current-snapshot-id": 3497810964824022504, + "refs": {"main": {"snapshot-id": 3497810964824022504, "type": "branch"}}, + "snapshots": [ + { + "snapshot-id": 3497810964824022504, + "timestamp-ms": 1646787054459, + "summary": { + "operation": "append", + "spark.app.id": "local-1646787004168", + "added-data-files": "1", + "added-records": "1", + "added-files-size": "697", + "changed-partition-count": "1", + "total-records": "1", + "total-files-size": "697", + "total-data-files": "1", + "total-delete-files": "0", + "total-position-deletes": "0", + "total-equality-deletes": "0", + }, + "manifest-list": "s3://tabular-public-us-west-2-dev/bb30733e-8769-4dab-aa1b-e76245bb2bd4/b55d9dda-6561-423a-8bfc-787980ce421f/metadata/snap-3497810964824022504-1-c4f68204-666b-4e50-a9df-b10c34bf6b82.avro", + "schema-id": 0, + } + ], + "snapshot-log": [{"timestamp-ms": 1646787054459, "snapshot-id": 3497810964824022504}], + "metadata-log": [ + { + "timestamp-ms": 1646787031514, + "metadata-file": "s3://tabular-public-us-west-2-dev/bb30733e-8769-4dab-aa1b-e76245bb2bd4/b55d9dda-6561-423a-8bfc-787980ce421f/metadata/00000-88484a1c-00e5-4a07-a787-c0e7aeffa805.gz.metadata.json", + } + ], + }, + "config": {"client.factory": "io.tabular.iceberg.catalog.TabularAwsClientFactory", "region": "us-west-2"}, + }, + status_code=200, + ) + table = RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).load_table(("fokko", "table")) + assert table == Table( + identifier=("rest", "fokko", "table"), + metadata_location=None, + metadata=TableMetadataV1( + location="s3://tabular-public-us-west-2-dev/bb30733e-8769-4dab-aa1b-e76245bb2bd4/b55d9dda-6561-423a-8bfc-787980ce421f", + table_uuid=UUID("b55d9dda-6561-423a-8bfc-787980ce421f"), + last_updated_ms=1646787054459, + last_column_id=2, + schemas=[ + Schema( + NestedField(field_id=1, name="id", field_type=IntegerType(), required=False), + NestedField(field_id=2, name="data", field_type=StringType(), required=False), + schema_id=0, + identifier_field_ids=[], + ) + ], + current_schema_id=0, + partition_specs=[PartitionSpec(spec_id=0, fields=())], + default_spec_id=0, + last_partition_id=999, + properties={"owner": "bryan", "write.metadata.compression-codec": "gzip"}, + current_snapshot_id=3497810964824022504, + snapshots=[ + Snapshot( + snapshot_id=3497810964824022504, + parent_snapshot_id=None, + sequence_number=None, + timestamp_ms=1646787054459, + manifest_list="s3://tabular-public-us-west-2-dev/bb30733e-8769-4dab-aa1b-e76245bb2bd4/b55d9dda-6561-423a-8bfc-787980ce421f/metadata/snap-3497810964824022504-1-c4f68204-666b-4e50-a9df-b10c34bf6b82.avro", + summary=Summary( + operation=Operation.APPEND, + **{ # type: ignore + "spark.app.id": "local-1646787004168", + "added-data-files": "1", + "added-records": "1", + "added-files-size": "697", + "changed-partition-count": "1", + "total-records": "1", + "total-files-size": "697", + "total-data-files": "1", + "total-delete-files": "0", + "total-position-deletes": "0", + "total-equality-deletes": "0", + }, + ), + schema_id=0, + ) + ], + snapshot_log=[{"timestamp-ms": 1646787054459, "snapshot-id": 3497810964824022504}], + metadata_log=[ + { + "timestamp-ms": 1646787031514, + "metadata-file": "s3://tabular-public-us-west-2-dev/bb30733e-8769-4dab-aa1b-e76245bb2bd4/b55d9dda-6561-423a-8bfc-787980ce421f/metadata/00000-88484a1c-00e5-4a07-a787-c0e7aeffa805.gz.metadata.json", + } + ], + sort_orders=[SortOrder(order_id=0)], + default_sort_order_id=0, + refs={ + "main": SnapshotRef( + snapshot_id=3497810964824022504, + snapshot_ref_type=SnapshotRefType.BRANCH, + min_snapshots_to_keep=None, + max_snapshot_age_ms=None, + max_ref_age_ms=None, + ) + }, + format_version=1, + schema_=Schema( + NestedField(field_id=1, name="id", field_type=IntegerType(), required=False), + NestedField(field_id=2, name="data", field_type=StringType(), required=False), + schema_id=0, + identifier_field_ids=[], + ), + partition_spec=[], + ), + config={"client.factory": "io.tabular.iceberg.catalog.TabularAwsClientFactory", "region": "us-west-2"}, + ) + + +def test_load_table_404(rest_mock: Mocker): + rest_mock.get( + f"{TEST_HOST}v1/namespaces/fokko/tables/does_not_exists", + json={ + "error": { + "message": "Table does not exist: examples.does_not_exists in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "NoSuchNamespaceErrorException", + "code": 404, + } + }, + status_code=404, + ) + + with pytest.raises(NoSuchTableError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).load_table(("fokko", "does_not_exists")) + assert "Table does not exist" in str(e.value) + + +def test_drop_table_404(rest_mock: Mocker): + rest_mock.delete( + f"{TEST_HOST}v1/namespaces/fokko/tables/does_not_exists", + json={ + "error": { + "message": "Table does not exist: fokko.does_not_exists in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "NoSuchNamespaceErrorException", + "code": 404, + } + }, + status_code=404, + ) + + with pytest.raises(NoSuchTableError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).drop_table(("fokko", "does_not_exists")) + assert "Table does not exist" in str(e.value) + + +def test_create_table_200(rest_mock: Mocker, table_schema_simple: Schema): + rest_mock.post( + f"{TEST_HOST}v1/namespaces/fokko/tables", + json={ + "metadataLocation": None, + "metadata": { + "format-version": 1, + "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29", + "location": "s3://tabular-wh-us-west-2-dev/8bcb0838-50fc-472d-9ddb-8feb89ef5f1e/bf289591-dcc0-4234-ad4f-5c3eed811a29", + "last-updated-ms": 1657810967051, + "last-column-id": 3, + "schema": { + "type": "struct", + "schema-id": 0, + "identifier-field-ids": [2], + "fields": [ + {"id": 1, "name": "foo", "required": False, "type": "string"}, + {"id": 2, "name": "bar", "required": True, "type": "int"}, + {"id": 3, "name": "baz", "required": False, "type": "boolean"}, + ], + }, + "current-schema-id": 0, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "identifier-field-ids": [2], + "fields": [ + {"id": 1, "name": "foo", "required": False, "type": "string"}, + {"id": 2, "name": "bar", "required": True, "type": "int"}, + {"id": 3, "name": "baz", "required": False, "type": "boolean"}, + ], + } + ], + "partition-spec": [], + "default-spec-id": 0, + "partition-specs": [{"spec-id": 0, "fields": []}], + "last-partition-id": 999, + "default-sort-order-id": 0, + "sort-orders": [{"order-id": 0, "fields": []}], + "properties": { + "write.delete.parquet.compression-codec": "zstd", + "write.metadata.compression-codec": "gzip", + "write.summary.partition-limit": "100", + "write.parquet.compression-codec": "zstd", + }, + "current-snapshot-id": -1, + "refs": {}, + "snapshots": [], + "snapshot-log": [], + "metadata-log": [], + }, + "config": { + "client.factory": "io.tabular.iceberg.catalog.TabularAwsClientFactory", + "region": "us-west-2", + }, + }, + status_code=200, + ) + table = RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).create_table( + identifier=("fokko", "fokko2"), + schema=table_schema_simple, + location=None, + partition_spec=PartitionSpec( + spec_id=1, fields=(PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=3), name="id"),) + ), + sort_order=SortOrder(1, SortField(source_id=2, transform=IdentityTransform())), + properties={"owner": "fokko"}, + ) + assert table == Table( + identifier=("rest", "fokko", "fokko2"), + metadata_location=None, + metadata=TableMetadataV1( + location="s3://tabular-wh-us-west-2-dev/8bcb0838-50fc-472d-9ddb-8feb89ef5f1e/bf289591-dcc0-4234-ad4f-5c3eed811a29", + table_uuid=UUID("bf289591-dcc0-4234-ad4f-5c3eed811a29"), + last_updated_ms=1657810967051, + last_column_id=3, + schemas=[ + Schema( + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + schema_id=0, + identifier_field_ids=[2], + ) + ], + current_schema_id=0, + partition_specs=[PartitionSpec(spec_id=0, fields=())], + default_spec_id=0, + last_partition_id=999, + properties={ + "write.delete.parquet.compression-codec": "zstd", + "write.metadata.compression-codec": "gzip", + "write.summary.partition-limit": "100", + "write.parquet.compression-codec": "zstd", + }, + current_snapshot_id=None, + snapshots=[], + snapshot_log=[], + metadata_log=[], + sort_orders=[SortOrder(order_id=0)], + default_sort_order_id=0, + refs={}, + format_version=1, + schema_=Schema( + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + schema_id=0, + identifier_field_ids=[2], + ), + partition_spec=[], + ), + ) + + +def test_create_table_409(rest_mock, table_schema_simple: Schema): + rest_mock.post( + f"{TEST_HOST}v1/namespaces/fokko/tables", + json={ + "error": { + "message": "Table already exists: fokko.already_exists in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "AlreadyExistsException", + "code": 409, + } + }, + status_code=409, + ) + + with pytest.raises(TableAlreadyExistsError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).create_table( + identifier=("fokko", "fokko2"), + schema=table_schema_simple, + location=None, + partition_spec=PartitionSpec( + spec_id=1, + fields=(PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=3), name="id"),), + ), + sort_order=SortOrder(1, SortField(source_id=2, transform=IdentityTransform())), + properties={"owner": "fokko"}, + ) + assert "Table already exists" in str(e.value) + + +def test_delete_namespace_204(rest_mock: Mocker): + namespace = "example" + rest_mock.delete( + f"{TEST_HOST}v1/namespaces/{namespace}", + json={}, + status_code=204, + ) + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).drop_namespace(namespace) + + +def test_delete_table_204(rest_mock: Mocker): + rest_mock.delete( + f"{TEST_HOST}v1/namespaces/example/tables/fokko", + json={}, + status_code=204, + ) + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).drop_table(("example", "fokko")) + + +def test_delete_table_404(rest_mock: Mocker): + rest_mock.delete( + f"{TEST_HOST}v1/namespaces/example/tables/fokko", + json={ + "error": { + "message": "Table does not exist: fokko.fokko2 in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "NoSuchNamespaceErrorException", + "code": 404, + } + }, + status_code=404, + ) + with pytest.raises(NoSuchTableError) as e: + RestCatalog("rest", {}, TEST_HOST, token=TEST_TOKEN).drop_table(("example", "fokko")) + assert "Table does not exist" in str(e.value) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 1b004ec8f87c..7aa9290390a6 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -76,7 +76,7 @@ def table_schema_simple(): NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), schema_id=1, - identifier_field_ids=[1], + identifier_field_ids=[2], ) diff --git a/python/tests/table/test_metadata.py b/python/tests/table/test_metadata.py index 0a58f201de26..9c7352d511f7 100644 --- a/python/tests/table/test_metadata.py +++ b/python/tests/table/test_metadata.py @@ -207,13 +207,13 @@ def test_updating_metadata(): def test_serialize_v1(): table_metadata = TableMetadataV1(**EXAMPLE_TABLE_METADATA_V1).json() - expected = """{"location": "s3://bucket/test/location", "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", "last-updated-ms": 1602638573874, "last-column-id": 3, "schemas": [{"fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long", "required": true, "doc": "comment"}, {"id": 3, "name": "z", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}], "current-schema-id": 0, "partition-specs": [{"spec-id": 0, "fields": [{"source-id": 1, "field-id": 1000, "transform": "identity", "name": "x"}]}], "default-spec-id": 0, "last-partition-id": 1000, "properties": {}, "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}], "snapshot-log": [], "metadata-log": [], "sort-orders": [{"order-id": 0, "fields": []}], "default-sort-order-id": 0, "refs": {}, "format-version": 1, "schema": {"fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long", "required": true, "doc": "comment"}, {"id": 3, "name": "z", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}, "partition-spec": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}""" + expected = """{"location": "s3://bucket/test/location", "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", "last-updated-ms": 1602638573874, "last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long", "required": true, "doc": "comment"}, {"id": 3, "name": "z", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}], "current-schema-id": 0, "partition-specs": [{"spec-id": 0, "fields": [{"source-id": 1, "field-id": 1000, "transform": "identity", "name": "x"}]}], "default-spec-id": 0, "last-partition-id": 1000, "properties": {}, "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}], "snapshot-log": [], "metadata-log": [], "sort-orders": [{"order-id": 0, "fields": []}], "default-sort-order-id": 0, "refs": {}, "format-version": 1, "schema": {"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long", "required": true, "doc": "comment"}, {"id": 3, "name": "z", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}, "partition-spec": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}""" assert table_metadata == expected def test_serialize_v2(): table_metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2).json() - expected = """{"location": "s3://bucket/test/location", "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", "last-updated-ms": 1602638573590, "last-column-id": 3, "schemas": [{"fields": [{"id": 1, "name": "x", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}, {"fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long", "required": true, "doc": "comment"}, {"id": 3, "name": "z", "type": "long", "required": true}], "schema-id": 1, "identifier-field-ids": [1, 2]}], "current-schema-id": 1, "partition-specs": [{"spec-id": 0, "fields": [{"source-id": 1, "field-id": 1000, "transform": "identity", "name": "x"}]}], "default-spec-id": 0, "last-partition-id": 1000, "properties": {"read.split.target.size": "134217728"}, "current-snapshot-id": 3055729675574597004, "snapshots": [{"snapshot-id": 3051729675574597004, "sequence-number": 0, "timestamp-ms": 1515100955770, "manifest-list": "s3://a/b/1.avro", "summary": {"operation": "append"}}, {"snapshot-id": 3055729675574597004, "parent-snapshot-id": 3051729675574597004, "sequence-number": 1, "timestamp-ms": 1555100955770, "manifest-list": "s3://a/b/2.avro", "summary": {"operation": "append"}, "schema-id": 1}], "snapshot-log": [{"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770}, {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770}], "metadata-log": [], "sort-orders": [{"order-id": 3, "fields": [{"source-id": 2, "transform": "identity", "direction": "asc", "null-order": "nulls-first"}, {"source-id": 3, "transform": "bucket[4]", "direction": "desc", "null-order": "nulls-last"}]}], "default-sort-order-id": 3, "refs": {"main": {"snapshot-id": 3055729675574597004, "type": "branch"}}, "format-version": 2, "last-sequence-number": 34}""" + expected = """{"location": "s3://bucket/test/location", "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", "last-updated-ms": 1602638573590, "last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}, {"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long", "required": true, "doc": "comment"}, {"id": 3, "name": "z", "type": "long", "required": true}], "schema-id": 1, "identifier-field-ids": [1, 2]}], "current-schema-id": 1, "partition-specs": [{"spec-id": 0, "fields": [{"source-id": 1, "field-id": 1000, "transform": "identity", "name": "x"}]}], "default-spec-id": 0, "last-partition-id": 1000, "properties": {"read.split.target.size": "134217728"}, "current-snapshot-id": 3055729675574597004, "snapshots": [{"snapshot-id": 3051729675574597004, "sequence-number": 0, "timestamp-ms": 1515100955770, "manifest-list": "s3://a/b/1.avro", "summary": {"operation": "append"}}, {"snapshot-id": 3055729675574597004, "parent-snapshot-id": 3051729675574597004, "sequence-number": 1, "timestamp-ms": 1555100955770, "manifest-list": "s3://a/b/2.avro", "summary": {"operation": "append"}, "schema-id": 1}], "snapshot-log": [{"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770}, {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770}], "metadata-log": [], "sort-orders": [{"order-id": 3, "fields": [{"source-id": 2, "transform": "identity", "direction": "asc", "null-order": "nulls-first"}, {"source-id": 3, "transform": "bucket[4]", "direction": "desc", "null-order": "nulls-last"}]}], "default-sort-order-id": 3, "refs": {"main": {"snapshot-id": 3055729675574597004, "type": "branch"}}, "format-version": 2, "last-sequence-number": 34}""" assert table_metadata == expected @@ -505,6 +505,7 @@ def test_v1_write_metadata_for_v2(): ], "identifier-field-ids": [], "schema-id": 0, + "type": "struct", } ] assert metadata_v2["partition-specs"] == [ @@ -525,3 +526,54 @@ def test_v1_write_metadata_for_v2(): def test_v2_ref_creation(): table_metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2) assert table_metadata.refs == {"main": SnapshotRef(snapshot_id=3055729675574597004, snapshot_ref_type=SnapshotRefType.BRANCH)} + + +def test_metadata_v1(): + valid_v1 = { + "format-version": 1, + "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29", + "location": "s3://tabular-wh-us-west-2-dev/8bcb0838-50fc-472d-9ddb-8feb89ef5f1e/bf289591-dcc0-4234-ad4f-5c3eed811a29", + "last-updated-ms": 1657810967051, + "last-column-id": 3, + "schema": { + "type": "struct", + "schema-id": 0, + "identifier-field-ids": [2], + "fields": [ + {"id": 1, "name": "foo", "required": False, "type": "string"}, + {"id": 2, "name": "bar", "required": True, "type": "int"}, + {"id": 3, "name": "baz", "required": False, "type": "boolean"}, + ], + }, + "current-schema-id": 0, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "identifier-field-ids": [2], + "fields": [ + {"id": 1, "name": "foo", "required": False, "type": "string"}, + {"id": 2, "name": "bar", "required": True, "type": "int"}, + {"id": 3, "name": "baz", "required": False, "type": "boolean"}, + ], + } + ], + "partition-spec": [], + "default-spec-id": 0, + "partition-specs": [{"spec-id": 0, "fields": []}], + "last-partition-id": 999, + "default-sort-order-id": 0, + "sort-orders": [{"order-id": 0, "fields": []}], + "properties": { + "write.delete.parquet.compression-codec": "zstd", + "write.metadata.compression-codec": "gzip", + "write.summary.partition-limit": "100", + "write.parquet.compression-codec": "zstd", + }, + "current-snapshot-id": -1, + "refs": {}, + "snapshots": [], + "snapshot-log": [], + "metadata-log": [], + } + TableMetadataV1(**valid_v1) diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py index 3d61323f0221..ad094ab9345c 100644 --- a/python/tests/test_schema.py +++ b/python/tests/test_schema.py @@ -406,13 +406,13 @@ def get(self, pos: int) -> Any: def test_serialize_schema(table_schema_simple: Schema): actual = table_schema_simple.json() - expected = """{"fields": [{"id": 1, "name": "foo", "type": "string", "required": false}, {"id": 2, "name": "bar", "type": "int", "required": true}, {"id": 3, "name": "baz", "type": "boolean", "required": false}], "schema-id": 1, "identifier-field-ids": [1]}""" + expected = """{"type": "struct", "fields": [{"id": 1, "name": "foo", "type": "string", "required": false}, {"id": 2, "name": "bar", "type": "int", "required": true}, {"id": 3, "name": "baz", "type": "boolean", "required": false}], "schema-id": 1, "identifier-field-ids": [2]}""" assert actual == expected def test_deserialize_schema(table_schema_simple: Schema): actual = Schema.parse_raw( - """{"fields": [{"id": 1, "name": "foo", "type": "string", "required": false}, {"id": 2, "name": "bar", "type": "int", "required": true}, {"id": 3, "name": "baz", "type": "boolean", "required": false}], "schema-id": 1, "identifier-field-ids": [1]}""" + """{"type": "struct", "fields": [{"id": 1, "name": "foo", "type": "string", "required": false}, {"id": 2, "name": "bar", "type": "int", "required": true}, {"id": 3, "name": "baz", "type": "boolean", "required": false}], "schema-id": 1, "identifier-field-ids": [2]}""" ) expected = table_schema_simple assert actual == expected diff --git a/python/tests/utils/test_bin_packing.py b/python/tests/utils/test_bin_packing.py index 71fd53c02762..5447dfe9284f 100644 --- a/python/tests/utils/test_bin_packing.py +++ b/python/tests/utils/test_bin_packing.py @@ -19,7 +19,6 @@ import pytest -from pyiceberg.schema import Schema from pyiceberg.utils.bin_packing import PackingIterator @@ -82,17 +81,3 @@ def weight_func(x): return x assert list(PackingIterator(splits, target_weight, lookback, weight_func, largest_bin_first)) == expected_lists - - -def test_serialize_schema(table_schema_simple: Schema): - actual = table_schema_simple.json() - expected = """{"fields": [{"id": 1, "name": "foo", "type": "string", "required": false}, {"id": 2, "name": "bar", "type": "int", "required": true}, {"id": 3, "name": "baz", "type": "boolean", "required": false}], "schema-id": 1, "identifier-field-ids": [1]}""" - assert actual == expected - - -def test_deserialize_schema(table_schema_simple: Schema): - actual = Schema.parse_raw( - """{"fields": [{"id": 1, "name": "foo", "type": "string", "required": false}, {"id": 2, "name": "bar", "type": "int", "required": true}, {"id": 3, "name": "baz", "type": "boolean", "required": false}], "schema-id": 1, "identifier-field-ids": [1]}""" - ) - expected = table_schema_simple - assert actual == expected