From 4592368c2db3cc54f2e23494375c7037e31f3430 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 10:35:33 -0800 Subject: [PATCH 1/2] chore(deps): bump nltk from 3.8.1 to 3.9.1 (#43) Co-authored-by: Aaron Steers Co-authored-by: Aldo Gonzalez <168454423+aldogonzalez8@users.noreply.github.com> --- .gitignore | 1 + .../file_types/unstructured_parser.py | 88 ++++++-- poetry.lock | 198 ++++++++---------- pyproject.toml | 4 +- .../test_connector_builder_handler.py | 29 +-- .../backoff_strategies/test_header_helper.py | 6 +- .../test_wait_time_from_header.py | 2 +- 7 files changed, 181 insertions(+), 147 deletions(-) diff --git a/.gitignore b/.gitignore index fe51d38a0..b95d9039b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ dist .mypy_cache .venv .pytest_cache +.idea **/__pycache__ diff --git a/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py b/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py index e397ceae2..972240dc6 100644 --- a/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py @@ -29,16 +29,25 @@ from airbyte_cdk.utils import is_cloud_environment from airbyte_cdk.utils.traced_exception import AirbyteTracedException from unstructured.file_utils.filetype import ( + EXT_TO_FILETYPE, FILETYPE_TO_MIMETYPE, STR_TO_FILETYPE, FileType, detect_filetype, ) +import nltk unstructured_partition_pdf = None unstructured_partition_docx = None unstructured_partition_pptx = None +try: + nltk.data.find("tokenizers/punkt.zip") + nltk.data.find("tokenizers/punkt_tab.zip") +except LookupError: + nltk.download("punkt") + nltk.download("punkt_tab") + def optional_decode(contents: Union[str, bytes]) -> str: if isinstance(contents, bytes): @@ -108,9 +117,11 @@ async def infer_schema( format = _extract_format(config) with stream_reader.open_file(file, self.file_read_mode, None, logger) as file_handle: filetype = self._get_filetype(file_handle, file) - if filetype not in self._supported_file_types() and not format.skip_unprocessable_files: - raise self._create_parse_error(file, self._get_file_type_error_message(filetype)) + raise self._create_parse_error( + file, + self._get_file_type_error_message(filetype), + ) return { "content": { @@ -159,6 +170,10 @@ def parse_records( logger.warn(f"File {file.uri} cannot be parsed. Skipping it.") else: raise e + except Exception as e: + exception_str = str(e) + logger.error(f"File {file.uri} caused an error during parsing: {exception_str}.") + raise e def _read_file( self, @@ -176,20 +191,32 @@ def _read_file( # check whether unstructured library is actually available for better error message and to ensure proper typing (can't be None after this point) raise Exception("unstructured library is not available") - filetype = self._get_filetype(file_handle, remote_file) + filetype: FileType | None = self._get_filetype(file_handle, remote_file) - if filetype == FileType.MD or filetype == FileType.TXT: + if filetype is None or filetype not in self._supported_file_types(): + raise self._create_parse_error( + remote_file, + self._get_file_type_error_message(filetype), + ) + if filetype in {FileType.MD, FileType.TXT}: file_content: bytes = file_handle.read() decoded_content: str = optional_decode(file_content) return decoded_content - if filetype not in self._supported_file_types(): - raise self._create_parse_error(remote_file, self._get_file_type_error_message(filetype)) if format.processing.mode == "local": - return self._read_file_locally(file_handle, filetype, format.strategy, remote_file) + return self._read_file_locally( + file_handle, + filetype, + format.strategy, + remote_file, + ) elif format.processing.mode == "api": try: result: str = self._read_file_remotely_with_retries( - file_handle, format.processing, filetype, format.strategy, remote_file + file_handle, + format.processing, + filetype, + format.strategy, + remote_file, ) except Exception as e: # If a parser error happens during remotely processing the file, this means the file is corrupted. This case is handled by the parse_records method, so just rethrow. @@ -336,7 +363,11 @@ def _read_file_locally( return self._render_markdown([element.to_dict() for element in elements]) - def _create_parse_error(self, remote_file: RemoteFile, message: str) -> RecordParseError: + def _create_parse_error( + self, + remote_file: RemoteFile, + message: str, + ) -> RecordParseError: return RecordParseError( FileBasedSourceError.ERROR_PARSING_RECORD, filename=remote_file.uri, message=message ) @@ -360,32 +391,51 @@ def _get_filetype(self, file: IOBase, remote_file: RemoteFile) -> Optional[FileT # detect_filetype is either using the file name or file content # if possible, try to leverage the file name to detect the file type # if the file name is not available, use the file content - file_type = detect_filetype( - filename=remote_file.uri, - ) - if file_type is not None and not file_type == FileType.UNK: + file_type: FileType | None = None + try: + file_type = detect_filetype( + filename=remote_file.uri, + ) + except Exception: + # Path doesn't exist locally. Try something else... + pass + + if file_type and file_type != FileType.UNK: return file_type type_based_on_content = detect_filetype(file=file) + file.seek(0) # detect_filetype is reading to read the file content, so we need to reset - # detect_filetype is reading to read the file content - file.seek(0) + if type_based_on_content and type_based_on_content != FileType.UNK: + return type_based_on_content - return type_based_on_content + extension = "." + remote_file.uri.split(".")[-1].lower() + if extension in EXT_TO_FILETYPE: + return EXT_TO_FILETYPE[extension] + + return None def _supported_file_types(self) -> List[Any]: return [FileType.MD, FileType.PDF, FileType.DOCX, FileType.PPTX, FileType.TXT] - def _get_file_type_error_message(self, file_type: FileType) -> str: + def _get_file_type_error_message( + self, + file_type: FileType | None, + ) -> str: supported_file_types = ", ".join([str(type) for type in self._supported_file_types()]) - return f"File type {file_type} is not supported. Supported file types are {supported_file_types}" + return f"File type {file_type or 'None'!s} is not supported. Supported file types are {supported_file_types}" def _render_markdown(self, elements: List[Any]) -> str: return "\n\n".join((self._convert_to_markdown(el) for el in elements)) def _convert_to_markdown(self, el: Dict[str, Any]) -> str: if dpath.get(el, "type") == "Title": - heading_str = "#" * (dpath.get(el, "metadata/category_depth", default=1) or 1) + category_depth = dpath.get(el, "metadata/category_depth", default=1) or 1 + if not isinstance(category_depth, int): + category_depth = ( + int(category_depth) if isinstance(category_depth, (str, float)) else 1 + ) + heading_str = "#" * category_depth return f"{heading_str} {dpath.get(el, 'text')}" elif dpath.get(el, "type") == "ListItem": return f"- {dpath.get(el, 'text')}" diff --git a/poetry.lock b/poetry.lock index a9efac1ef..9dffe6a67 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -13,102 +13,87 @@ files = [ [[package]] name = "aiohttp" -version = "3.10.11" +version = "3.11.0" description = "Async http client/server framework (asyncio)" optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"}, - {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"}, - {file = "aiohttp-3.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffbfde2443696345e23a3c597049b1dd43049bb65337837574205e7368472177"}, - {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20b3d9e416774d41813bc02fdc0663379c01817b0874b932b81c7f777f67b217"}, - {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b943011b45ee6bf74b22245c6faab736363678e910504dd7531a58c76c9015a"}, - {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48bc1d924490f0d0b3658fe5c4b081a4d56ebb58af80a6729d4bd13ea569797a"}, - {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e12eb3f4b1f72aaaf6acd27d045753b18101524f72ae071ae1c91c1cd44ef115"}, - {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f14ebc419a568c2eff3c1ed35f634435c24ead2fe19c07426af41e7adb68713a"}, - {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:72b191cdf35a518bfc7ca87d770d30941decc5aaf897ec8b484eb5cc8c7706f3"}, - {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5ab2328a61fdc86424ee540d0aeb8b73bbcad7351fb7cf7a6546fc0bcffa0038"}, - {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa93063d4af05c49276cf14e419550a3f45258b6b9d1f16403e777f1addf4519"}, - {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:30283f9d0ce420363c24c5c2421e71a738a2155f10adbb1a11a4d4d6d2715cfc"}, - {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e5358addc8044ee49143c546d2182c15b4ac3a60be01c3209374ace05af5733d"}, - {file = "aiohttp-3.10.11-cp310-cp310-win32.whl", hash = "sha256:e1ffa713d3ea7cdcd4aea9cddccab41edf6882fa9552940344c44e59652e1120"}, - {file = "aiohttp-3.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:778cbd01f18ff78b5dd23c77eb82987ee4ba23408cbed233009fd570dda7e674"}, - {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:80ff08556c7f59a7972b1e8919f62e9c069c33566a6d28586771711e0eea4f07"}, - {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c8f96e9ee19f04c4914e4e7a42a60861066d3e1abf05c726f38d9d0a466e695"}, - {file = "aiohttp-3.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fb8601394d537da9221947b5d6e62b064c9a43e88a1ecd7414d21a1a6fba9c24"}, - {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea224cf7bc2d8856d6971cea73b1d50c9c51d36971faf1abc169a0d5f85a382"}, - {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db9503f79e12d5d80b3efd4d01312853565c05367493379df76d2674af881caa"}, - {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0f449a50cc33f0384f633894d8d3cd020e3ccef81879c6e6245c3c375c448625"}, - {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82052be3e6d9e0c123499127782a01a2b224b8af8c62ab46b3f6197035ad94e9"}, - {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20063c7acf1eec550c8eb098deb5ed9e1bb0521613b03bb93644b810986027ac"}, - {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:489cced07a4c11488f47aab1f00d0c572506883f877af100a38f1fedaa884c3a"}, - {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ea9b3bab329aeaa603ed3bf605f1e2a6f36496ad7e0e1aa42025f368ee2dc07b"}, - {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ca117819d8ad113413016cb29774b3f6d99ad23c220069789fc050267b786c16"}, - {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2dfb612dcbe70fb7cdcf3499e8d483079b89749c857a8f6e80263b021745c730"}, - {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9b615d3da0d60e7d53c62e22b4fd1c70f4ae5993a44687b011ea3a2e49051b8"}, - {file = "aiohttp-3.10.11-cp311-cp311-win32.whl", hash = "sha256:29103f9099b6068bbdf44d6a3d090e0a0b2be6d3c9f16a070dd9d0d910ec08f9"}, - {file = "aiohttp-3.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:236b28ceb79532da85d59aa9b9bf873b364e27a0acb2ceaba475dc61cffb6f3f"}, - {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7480519f70e32bfb101d71fb9a1f330fbd291655a4c1c922232a48c458c52710"}, - {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65267266c9aeb2287a6622ee2bb39490292552f9fbf851baabc04c9f84e048d"}, - {file = "aiohttp-3.10.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7400a93d629a0608dc1d6c55f1e3d6e07f7375745aaa8bd7f085571e4d1cee97"}, - {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f34b97e4b11b8d4eb2c3a4f975be626cc8af99ff479da7de49ac2c6d02d35725"}, - {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e7b825da878464a252ccff2958838f9caa82f32a8dbc334eb9b34a026e2c636"}, - {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f92a344c50b9667827da308473005f34767b6a2a60d9acff56ae94f895f385"}, - {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f1ab987a27b83c5268a17218463c2ec08dbb754195113867a27b166cd6087"}, - {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dc0f4ca54842173d03322793ebcf2c8cc2d34ae91cc762478e295d8e361e03f"}, - {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7ce6a51469bfaacff146e59e7fb61c9c23006495d11cc24c514a455032bcfa03"}, - {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aad3cd91d484d065ede16f3cf15408254e2469e3f613b241a1db552c5eb7ab7d"}, - {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f4df4b8ca97f658c880fb4b90b1d1ec528315d4030af1ec763247ebfd33d8b9a"}, - {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2e4e18a0a2d03531edbc06c366954e40a3f8d2a88d2b936bbe78a0c75a3aab3e"}, - {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ce66780fa1a20e45bc753cda2a149daa6dbf1561fc1289fa0c308391c7bc0a4"}, - {file = "aiohttp-3.10.11-cp312-cp312-win32.whl", hash = "sha256:a919c8957695ea4c0e7a3e8d16494e3477b86f33067478f43106921c2fef15bb"}, - {file = "aiohttp-3.10.11-cp312-cp312-win_amd64.whl", hash = "sha256:b5e29706e6389a2283a91611c91bf24f218962717c8f3b4e528ef529d112ee27"}, - {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:703938e22434d7d14ec22f9f310559331f455018389222eed132808cd8f44127"}, - {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9bc50b63648840854e00084c2b43035a62e033cb9b06d8c22b409d56eb098413"}, - {file = "aiohttp-3.10.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f0463bf8b0754bc744e1feb61590706823795041e63edf30118a6f0bf577461"}, - {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6c6dec398ac5a87cb3a407b068e1106b20ef001c344e34154616183fe684288"}, - {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcaf2d79104d53d4dcf934f7ce76d3d155302d07dae24dff6c9fffd217568067"}, - {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25fd5470922091b5a9aeeb7e75be609e16b4fba81cdeaf12981393fb240dd10e"}, - {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbde2ca67230923a42161b1f408c3992ae6e0be782dca0c44cb3206bf330dee1"}, - {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:249c8ff8d26a8b41a0f12f9df804e7c685ca35a207e2410adbd3e924217b9006"}, - {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:878ca6a931ee8c486a8f7b432b65431d095c522cbeb34892bee5be97b3481d0f"}, - {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8663f7777ce775f0413324be0d96d9730959b2ca73d9b7e2c2c90539139cbdd6"}, - {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6cd3f10b01f0c31481fba8d302b61603a2acb37b9d30e1d14e0f5a58b7b18a31"}, - {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e8d8aad9402d3aa02fdc5ca2fe68bcb9fdfe1f77b40b10410a94c7f408b664d"}, - {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38e3c4f80196b4f6c3a85d134a534a56f52da9cb8d8e7af1b79a32eefee73a00"}, - {file = "aiohttp-3.10.11-cp313-cp313-win32.whl", hash = "sha256:fc31820cfc3b2863c6e95e14fcf815dc7afe52480b4dc03393c4873bb5599f71"}, - {file = "aiohttp-3.10.11-cp313-cp313-win_amd64.whl", hash = "sha256:4996ff1345704ffdd6d75fb06ed175938c133425af616142e7187f28dc75f14e"}, - {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:74baf1a7d948b3d640badeac333af581a367ab916b37e44cf90a0334157cdfd2"}, - {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:473aebc3b871646e1940c05268d451f2543a1d209f47035b594b9d4e91ce8339"}, - {file = "aiohttp-3.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c2f746a6968c54ab2186574e15c3f14f3e7f67aef12b761e043b33b89c5b5f95"}, - {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d110cabad8360ffa0dec8f6ec60e43286e9d251e77db4763a87dcfe55b4adb92"}, - {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0099c7d5d7afff4202a0c670e5b723f7718810000b4abcbc96b064129e64bc7"}, - {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0316e624b754dbbf8c872b62fe6dcb395ef20c70e59890dfa0de9eafccd2849d"}, - {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a5f7ab8baf13314e6b2485965cbacb94afff1e93466ac4d06a47a81c50f9cca"}, - {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c891011e76041e6508cbfc469dd1a8ea09bc24e87e4c204e05f150c4c455a5fa"}, - {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9208299251370ee815473270c52cd3f7069ee9ed348d941d574d1457d2c73e8b"}, - {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:459f0f32c8356e8125f45eeff0ecf2b1cb6db1551304972702f34cd9e6c44658"}, - {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:14cdc8c1810bbd4b4b9f142eeee23cda528ae4e57ea0923551a9af4820980e39"}, - {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:971aa438a29701d4b34e4943e91b5e984c3ae6ccbf80dd9efaffb01bd0b243a9"}, - {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9a309c5de392dfe0f32ee57fa43ed8fc6ddf9985425e84bd51ed66bb16bce3a7"}, - {file = "aiohttp-3.10.11-cp38-cp38-win32.whl", hash = "sha256:9ec1628180241d906a0840b38f162a3215114b14541f1a8711c368a8739a9be4"}, - {file = "aiohttp-3.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:9c6e0ffd52c929f985c7258f83185d17c76d4275ad22e90aa29f38e211aacbec"}, - {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc493a2e5d8dc79b2df5bec9558425bcd39aff59fc949810cbd0832e294b106"}, - {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3e70f24e7d0405be2348da9d5a7836936bf3a9b4fd210f8c37e8d48bc32eca6"}, - {file = "aiohttp-3.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968b8fb2a5eee2770eda9c7b5581587ef9b96fbdf8dcabc6b446d35ccc69df01"}, - {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deef4362af9493d1382ef86732ee2e4cbc0d7c005947bd54ad1a9a16dd59298e"}, - {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:686b03196976e327412a1b094f4120778c7c4b9cff9bce8d2fdfeca386b89829"}, - {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3bf6d027d9d1d34e1c2e1645f18a6498c98d634f8e373395221121f1c258ace8"}, - {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:099fd126bf960f96d34a760e747a629c27fb3634da5d05c7ef4d35ef4ea519fc"}, - {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c73c4d3dae0b4644bc21e3de546530531d6cdc88659cdeb6579cd627d3c206aa"}, - {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0c5580f3c51eea91559db3facd45d72e7ec970b04528b4709b1f9c2555bd6d0b"}, - {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fdf6429f0caabfd8a30c4e2eaecb547b3c340e4730ebfe25139779b9815ba138"}, - {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d97187de3c276263db3564bb9d9fad9e15b51ea10a371ffa5947a5ba93ad6777"}, - {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:0acafb350cfb2eba70eb5d271f55e08bd4502ec35e964e18ad3e7d34d71f7261"}, - {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c13ed0c779911c7998a58e7848954bd4d63df3e3575f591e321b19a2aec8df9f"}, - {file = "aiohttp-3.10.11-cp39-cp39-win32.whl", hash = "sha256:22b7c540c55909140f63ab4f54ec2c20d2635c0289cdd8006da46f3327f971b9"}, - {file = "aiohttp-3.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:7b26b1551e481012575dab8e3727b16fe7dd27eb2711d2e63ced7368756268fb"}, - {file = "aiohttp-3.10.11.tar.gz", hash = "sha256:9dc2b8f3dcab2e39e0fa309c8da50c3b55e6f34ab25f1a71d3288f24924d33a7"}, + {file = "aiohttp-3.11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:024409c1b1d6076d0ed933dcebd7e4fc6f3320a227bfa0c1b6b93a8b5a146f04"}, + {file = "aiohttp-3.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:62502b8ffee8c6a4b5c6bf99d1de277d42bf51b2fb713975d9b63b560150b7ac"}, + {file = "aiohttp-3.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c54c635d1f52490cde7ef3a423645167a8284e452a35405d5c7dc1242a8e75c9"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:104ea21994b1403e4c1b398866f1187c1694fa291314ad7216ec1d8ec6b49f38"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04b24497b3baf15035730de5f207ade88a67d4483a5f16ced7ece348933a5b47"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08474e71772a516ba2e2167b4707af8361d2c452b3d8a5364c984f4867869499"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f40380c96dd407dfa84eb2d264e68aa47717b53bdbe210a59cc3c35a4635f195"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1668ef2f3a7ec9881f4b6a917e5f97c87a343fa6b0d5fc826b7b0297ddd0887"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f3bf5c132eb48002bcc3825702d241d35b4e9585009e65e9dcf9c4635d0b7424"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0315978b2a4569e03fb59100f6a7e7d23f718a4521491f5c13d946d37549f3d"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d5cae4cd271e20b7ab757e966cc919186b9f02535418ab36c471a5377ef4deaa"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:31b91ff3a1fcb206a1fa76e0de1f08c9ffb1dc0deb7296fa2618adfe380fc676"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ebf610c37df4f09c71c9bbf8309b4b459107e6fe889ac0d7e16f6e4ebd975f86"}, + {file = "aiohttp-3.11.0-cp310-cp310-win32.whl", hash = "sha256:b40c304ab01e89ad0aeeecf91bbaa6ae3b00e27b796c9e8d50b71a4a7e885cc8"}, + {file = "aiohttp-3.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd0834e4260eab78671b81d34f110fbaac449563e48d419cec0030d9a8e58693"}, + {file = "aiohttp-3.11.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:89a96a0696dc67d548f69cb518c581a7a33cc1f26ab42229dea1709217c9d926"}, + {file = "aiohttp-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6b925c7775ab857bdc1e52e1f5abcae7d18751c09b751aeb641a5276d9b990e"}, + {file = "aiohttp-3.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7867d0808614f04e78e0a8d5a2c1f8ac6bc626a0c0e2f62be48be6b749e2f8b2"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:229ae13959a5f499d90ffbb4b9eac2255d8599315027d6f7c22fa9803a94d5b1"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62a2f5268b672087c45b33479ba1bb1d5a48c6d76c133cfce3a4f77410c200d1"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a896059b6937d1a22d8ee8377cdcd097bd26cd8c653b8f972051488b9baadee9"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:104deb7873681273c5daa13c41924693df394043a118dae90387d35bc5531788"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae36ae52b0c22fb69fb8b744eff82a20db512a29eafc6e3a4ab43b17215b219d"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7349205bb163318dcc102329d30be59a647a3d24c82c3d91ed35b7e7301ea7e"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9095580806d9ed07c0c29b23364a0b1fb78258ef9f4bddf7e55bac0e475d4edf"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4d218d3eca40196384ad3b481309c56fd60e664128885d1734da0a8aa530d433"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:6533dd06df3d17d1756829b68b365b1583929b54082db8f65083a4184bf68322"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:72cd984f7f14e8c01b3e38f18f39ea85dba84e52ea05e37116ba5e2a72eef396"}, + {file = "aiohttp-3.11.0-cp311-cp311-win32.whl", hash = "sha256:c1828e10c3a49e2b234b87600ecb68a92b8a8dcf8b99bca9447f16c4baaa1630"}, + {file = "aiohttp-3.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:900ff74d78eb580ae4aa5883242893b123a0c442a46570902500f08d6a7e6696"}, + {file = "aiohttp-3.11.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f8f0d79b923070f25674e4ea8f3d61c9d89d24d9598d50ff32c5b9b23c79a25b"}, + {file = "aiohttp-3.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:113bf06b029143e94a47c4f36e11a8b7e396e9d1f1fc8cea58e6b7e370cfed38"}, + {file = "aiohttp-3.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3e1ed8d152cccceffb1ee7a2ac227c16372e453fb11b3aeaa56783049b85d3f6"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2e82e515e268b965424ecabebd91834a41b36260b6ef5db015ee12ddb28ef3"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c1c49bc393d854d4421ebc174a0a41f9261f50d3694d8ca277146cbbcfd24ee7"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57e17c6d71f2dc857a8a1d09be1be7802e35d90fb4ba4b06cf1aab6414a57894"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12071dd2cc95ba81e0f2737bebcb98b2a8656015e87772e84e8fb9e635b5da6e"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97056d3422594e0787733ac4c45bef58722d452f4dc6615fee42f59fe51707dd"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ec5efbc872b00ddd85e3904059d274f284cff314e13f48776050ca2c58f451d"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:dd505a1121ad5b666191840b7bd1d8cb917df2647deeca6f3474331b72452362"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:600b1d9f86a130131915e2f2127664311b33902c486b21a747d626f5144b4471"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8c47a0ba6c2b3d3e5715f8338d657badd21f778c6be16701922c65521c5ecfc9"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8b323b5d3aef7dd811424c269322eec58a977c0c8152e650159e47210d900504"}, + {file = "aiohttp-3.11.0-cp312-cp312-win32.whl", hash = "sha256:aabc4e92cb153636d6be54e84dad1b252ddb9aebe077942b6dcffe5e468d476a"}, + {file = "aiohttp-3.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:508cfcc99534b1282595357592d8367b44392b21f6eb5d4dc021f8d0d809e94d"}, + {file = "aiohttp-3.11.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c98a596ac20e8980cc6f34c0c92a113e98eb08f3997c150064d26d2aeb043e5a"}, + {file = "aiohttp-3.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ad14cdc0fba4df31c0f6e06c21928c5b924725cbf60d0ccc5f6e7132636250e9"}, + {file = "aiohttp-3.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:170fb2324826bb9f08055a8291f42192ae5ee2f25b2966c8f0f4537c61d73a7b"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdad66685fcf2ad14ce522cf849d4a025f4fd206d6cfc3f403d9873e4c243b03"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8b95a63a8e8b5f0464bd8b1b0d59d2bec98a59b6aacc71e9be23df6989b3dfb"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7bcfcede95531589295f56e924702cef7f9685c9e4e5407592e04ded6a65bf3"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ecc2fb1a0a9d48cf773add34196cddf7e488e48e9596e090849751bf43098f4"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8fef105113d56e817cb9bcc609667ee461321413a7b972b03f5b4939f40f307c"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d33b4490026968bdc7f0729b9d87a3a6b1e09043557d2fc1c605c6072deb2f11"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6362f50a6f0e5482c4330d2151cb682779230683da0e155c15ec9fc58cb50b6a"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f698aa61879df64425191d41213dfd99efdc1627e6398e6d7aa5c312fac9702"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0e7a0762cc29cd3acd01a4d2b547b3af7956ad230ebb80b529a8e4f3e4740fe8"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b3e4fb7f5354d39490d8209aefdf5830b208d01c7293a2164e404312c3d8bc55"}, + {file = "aiohttp-3.11.0-cp313-cp313-win32.whl", hash = "sha256:6c5a6958f4366496004cf503d847093d464814543f157ef3b738bbf604232415"}, + {file = "aiohttp-3.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:3ed360d6672a9423aad39902a4e9fe305464d20ed7931dbdba30a4625782d875"}, + {file = "aiohttp-3.11.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d1ea006426edf7e1299c52a58b0443158012f7a56fed3515164b60bfcb1503a9"}, + {file = "aiohttp-3.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c5e6a1f8b0268ffa1c84d7c3558724956002ba8361176e76406233e704bbcffb"}, + {file = "aiohttp-3.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:40dc9446cff326672fcbf93efdb8ef7e949824de1097624efe4f61ac7f0d2c43"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21b4545e8d96870da9652930c5198366605ff8f982757030e2148cf341e5746b"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:37f8cf3c43f292d9bb3e6760476c2b55b9663a581fad682a586a410c43a7683e"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:329f5059e0bf6983dceebac8e6ed20e75eaff6163b3414f4a4cb59e0d7037672"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ae6f182be72c3531915e90625cc65afce4df8a0fc4988bd52d8a5d5faaeb68"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d664e5f937c08adb7908ea9f391fbf2928a9b09cb412ac0aba602bde9e499e4"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:feca9fafa4385aea6759c171cd25ea82f7375312fca04178dae35331be45e538"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c415b9601ff50709d6050c8a9281733a9b042b9e589265ac40305b875cf9c463"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:91d3991fad8b65e5dbc13cd95669ea689fe0a96ff63e4e64ac24ed724e4f8103"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9231d610754724273a6ac05a1f177979490bfa6f84d49646df3928af2e88cfd5"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4e4e155968040e32c124a89852a1a5426d0e920a35f4331e1b3949037bfe93a3"}, + {file = "aiohttp-3.11.0-cp39-cp39-win32.whl", hash = "sha256:76d6ee8bb132f8ee0fcb0e205b4708ddb6fba524eb515ee168113063d825131b"}, + {file = "aiohttp-3.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:577c7429f8869fa30186fc2c9eee64d75a30b51b61f26aac9725866ae5985cfd"}, + {file = "aiohttp-3.11.0.tar.gz", hash = "sha256:f57a0de48dda792629e7952d34a0c7b81ea336bb9b721391c7c58145b237fe55"}, ] [package.dependencies] @@ -118,7 +103,8 @@ async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""} attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" -yarl = ">=1.12.0,<2.0" +propcache = ">=0.2.0" +yarl = ">=1.17.0,<2.0" [package.extras] speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] @@ -2503,13 +2489,13 @@ files = [ [[package]] name = "nltk" -version = "3.8.1" +version = "3.9.1" description = "Natural Language Toolkit" optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, - {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, + {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, + {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"}, ] [package.dependencies] @@ -3254,10 +3240,7 @@ files = [ [package.dependencies] annotated-types = ">=0.6.0" pydantic-core = "2.23.4" -typing-extensions = [ - {version = ">=4.6.1", markers = "python_version < \"3.13\""}, - {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, -] +typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} [package.extras] email = ["email-validator (>=2.0.0)"] @@ -4240,11 +4223,6 @@ files = [ {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, - {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, @@ -5252,5 +5230,5 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [metadata] lock-version = "2.0" -python-versions = "^3.10" -content-hash = "fb563a9483e45cd18a6f508b5d8f7b9a6362a9157a454c686a0fa212b0442021" +python-versions = "^3.10,<3.13" +content-hash = "e7ecc6f9875e1403a581a81c13595e4fed001e649face2bc3e466aa676a71fae" diff --git a/pyproject.toml b/pyproject.toml index 13f2e74c6..392ebd5a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ version = "0.0.0" # Version will be calculated dynamically. enable = true [tool.poetry.dependencies] -python = "^3.10" +python = "^3.10,<3.13" airbyte-protocol-models-dataclasses = "^0.13" backoff = "*" cachetools = "*" @@ -69,7 +69,7 @@ python-snappy = { version = "0.7.3", optional = true } Sphinx = { version = "~4.2", optional = true } sphinx-rtd-theme = { version = "~1.0", optional = true } tiktoken = { version = "0.8.0", optional = true } -nltk = { version = "3.8.1", optional = true } +nltk = { version = "3.9.1", optional = true } # This will ensure that even when you run poetry install or pip install, the compatible version of numpy will always be chosen. # airbyte-ci will try to install latest version when --use-local-cdk is used, resulting in the conflict. numpy = "<2" diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 10bd4513b..37b6dc6e6 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -7,6 +7,7 @@ import json import logging import os +from typing import Literal from unittest import mock from unittest.mock import MagicMock, patch @@ -38,6 +39,7 @@ AirbyteMessage, AirbyteMessageSerializer, AirbyteRecordMessage, + AirbyteStateBlob, AirbyteStateMessage, AirbyteStream, AirbyteStreamState, @@ -74,7 +76,8 @@ AirbyteStateMessage( type="STREAM", stream=AirbyteStreamState( - stream_descriptor=StreamDescriptor(name=_stream_name), stream_state={"key": "value"} + stream_descriptor=StreamDescriptor(name=_stream_name), + stream_state=AirbyteStateBlob({"key": "value"}), ), ) ] @@ -84,15 +87,17 @@ type="STREAM", stream=AirbyteStreamState( stream_descriptor=StreamDescriptor(name=_stream_name), - stream_state={ - "states": [ - { - "partition": {"key": "value"}, - "cursor": {"item_id": 0}, - }, - ], - "parent_state": {}, - }, + stream_state=AirbyteStateBlob( + { + "states": [ + { + "partition": {"key": "value"}, + "cursor": {"item_id": 0}, + }, + ], + "parent_state": {}, + } + ), ), ) ] @@ -569,7 +574,7 @@ def test_read(): ) -def test_config_update(): +def test_config_update() -> None: manifest = copy.deepcopy(MANIFEST) manifest["definitions"]["retriever"]["requester"]["authenticator"] = { "type": "OAuthAuthenticator", @@ -632,7 +637,7 @@ def spec(self, logger: logging.Logger) -> ConnectorSpecification: return connector_specification @property - def check_config_against_spec(self): + def check_config_against_spec(self) -> Literal[False]: return False stack_trace = "a stack trace" diff --git a/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py b/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py index 99af3ca2b..105b6bf50 100644 --- a/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +++ b/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py @@ -23,21 +23,21 @@ "test_get_numeric_value_with_regex", {"header": "61,60"}, "header", - re.compile("([-+]?\d+)"), + re.compile(r"([-+]?\d+)"), 61, ), # noqa ( "test_get_numeric_value_with_regex_no_header", {"header": "61,60"}, "notheader", - re.compile("([-+]?\d+)"), + re.compile(r"([-+]?\d+)"), None, ), # noqa ( "test_get_numeric_value_with_regex_not_matching", {"header": "abc61,60"}, "header", - re.compile("([-+]?\d+)"), + re.compile(r"([-+]?\d+)"), None, ), # noqa ], diff --git a/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py b/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py index 6db2f9fdd..d224718a4 100644 --- a/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +++ b/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py @@ -37,7 +37,7 @@ SOME_BACKOFF_TIME, ), ("test_wait_time_from_header_not_a_number", "wait_time", "61,60", None, None), - ("test_wait_time_from_header_with_regex", "wait_time", "61,60", "([-+]?\d+)", 61), # noqa + ("test_wait_time_from_header_with_regex", "wait_time", "61,60", r"([-+]?\d+)", 61), # noqa ("test_wait_time_fœrom_header_with_regex_no_match", "wait_time", "...", "[-+]?\d+", None), # noqa ("test_wait_time_from_header", "absent_header", None, None, None), ], From 72117aa6fa82ca70dfd2c4da97c7636cfa87a1db Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Mon, 18 Nov 2024 10:51:04 -0800 Subject: [PATCH 2/2] fix(manifest-connectors): resolve parsing errors during version comparisons (#38) --- .github/workflows/connector-tests.yml | 2 + .../manifest_declarative_source.py | 57 +++++++++++-------- .../test_manifest_declarative_source.py | 25 +++++++- 3 files changed, 58 insertions(+), 26 deletions(-) diff --git a/.github/workflows/connector-tests.yml b/.github/workflows/connector-tests.yml index 8a934d147..329d7be1d 100644 --- a/.github/workflows/connector-tests.yml +++ b/.github/workflows/connector-tests.yml @@ -72,6 +72,8 @@ jobs: cdk_extra: n/a - connector: source-shopify cdk_extra: n/a + - connector: source-chargebee + cdk_extra: n/a # Currently not passing CI (unrelated) # - connector: source-zendesk-support # cdk_extra: n/a diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 82f4dff3a..223cbc0b6 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -5,10 +5,10 @@ import json import logging import pkgutil -import re from copy import deepcopy from importlib import metadata -from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple +from typing import Any, Dict, Iterator, List, Mapping, Optional +from packaging.version import Version, InvalidVersion import yaml from airbyte_cdk.models import ( @@ -245,45 +245,54 @@ def _validate_source(self) -> None: "Validation against json schema defined in declarative_component_schema.yaml schema failed" ) from e - cdk_version = metadata.version("airbyte_cdk") - cdk_major, cdk_minor, cdk_patch = self._get_version_parts(cdk_version, "airbyte-cdk") - manifest_version = self._source_config.get("version") - if manifest_version is None: + cdk_version_str = metadata.version("airbyte_cdk") + cdk_version = self._parse_version(cdk_version_str, "airbyte-cdk") + manifest_version_str = self._source_config.get("version") + if manifest_version_str is None: raise RuntimeError( "Manifest version is not defined in the manifest. This is unexpected since it should be a required field. Please contact support." ) - manifest_major, manifest_minor, manifest_patch = self._get_version_parts( - manifest_version, "manifest" - ) + manifest_version = self._parse_version(manifest_version_str, "manifest") - if cdk_version.startswith("0.0.0"): + if (cdk_version.major, cdk_version.minor, cdk_version.micro) == (0, 0, 0): # Skipping version compatibility check on unreleased dev branch pass - elif cdk_major < manifest_major or ( - cdk_major == manifest_major and cdk_minor < manifest_minor + elif (cdk_version.major, cdk_version.minor) < ( + manifest_version.major, + manifest_version.minor, ): raise ValidationError( - f"The manifest version {manifest_version} is greater than the airbyte-cdk package version ({cdk_version}). Your " + f"The manifest version {manifest_version!s} is greater than the airbyte-cdk package version ({cdk_version!s}). Your " f"manifest may contain features that are not in the current CDK version." ) - elif manifest_major == 0 and manifest_minor < 29: + elif (manifest_version.major, manifest_version.minor) < (0, 29): raise ValidationError( f"The low-code framework was promoted to Beta in airbyte-cdk version 0.29.0 and contains many breaking changes to the " - f"language. The manifest version {manifest_version} is incompatible with the airbyte-cdk package version " - f"{cdk_version} which contains these breaking changes." + f"language. The manifest version {manifest_version!s} is incompatible with the airbyte-cdk package version " + f"{cdk_version!s} which contains these breaking changes." ) @staticmethod - def _get_version_parts(version: str, version_type: str) -> Tuple[int, int, int]: - """ - Takes a semantic version represented as a string and splits it into a tuple of its major, minor, and patch versions. + def _parse_version( + version: str, + version_type: str, + ) -> Version: + """Takes a semantic version represented as a string and splits it into a tuple. + + The fourth part (prerelease) is not returned in the tuple. + + Returns: + Version: the parsed version object """ - version_parts = re.split(r"\.", version) - if len(version_parts) != 3 or not all([part.isdigit() for part in version_parts]): + try: + parsed_version = Version(version) + except InvalidVersion as ex: raise ValidationError( - f"The {version_type} version {version} specified is not a valid version format (ex. 1.2.3)" - ) - return tuple(int(part) for part in version_parts) # type: ignore # We already verified there were 3 parts and they are all digits + f"The {version_type} version '{version}' is not a valid version format." + ) from ex + else: + # No exception + return parsed_version def _stream_configs(self, manifest: Mapping[str, Any]) -> List[Dict[str, Any]]: # This has a warning flag for static, but after we finish part 4 we'll replace manifest with self._source_config diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source.py b/unit_tests/sources/declarative/test_manifest_declarative_source.py index 19be3a82e..defbe1275 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source.py @@ -644,7 +644,22 @@ def test_source_with_missing_version_fails(self): id="manifest_version_has_invalid_minor_format", ), pytest.param( - "0.29.0", "0.29.0.1", ValidationError, id="manifest_version_has_extra_version_parts" + "0.29.0", + "0.29.0rc1", + None, + id="manifest_version_is_release_candidate", + ), + pytest.param( + "0.29.0rc1", + "0.29.0", + None, + id="cdk_version_is_release_candidate", + ), + pytest.param( + "0.29.0", + "0.29.0.0.3", # packaging library does not complain and the parts are ignored during comparisons. + None, + id="manifest_version_has_extra_version_parts", ), pytest.param( "0.29.0", "5.0", ValidationError, id="manifest_version_has_too_few_version_parts" @@ -655,7 +670,13 @@ def test_source_with_missing_version_fails(self): ], ) @patch("importlib.metadata.version") - def test_manifest_versions(self, version, cdk_version, manifest_version, expected_error): + def test_manifest_versions( + self, + version, + cdk_version, + manifest_version, + expected_error, + ) -> None: # Used to mock the metadata.version() for test scenarios which normally returns the actual version of the airbyte-cdk package version.return_value = cdk_version