Skip to content

Commit

Permalink
used lowercasenormalizer
Browse files Browse the repository at this point in the history
  • Loading branch information
tinomerl committed Dec 4, 2024
1 parent fddfa1b commit ee76045
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions airbyte/_writers/file_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from __future__ import annotations

import abc
import re
from collections import defaultdict
from pathlib import Path
from typing import IO, TYPE_CHECKING, final
Expand All @@ -14,6 +13,7 @@
from airbyte import exceptions as exc
from airbyte import progress
from airbyte._batch_handles import BatchHandle
from airbyte._util.name_normalizers import LowerCaseNormalizer
from airbyte._writers.base import AirbyteWriterInterface
from airbyte.records import StreamRecord, StreamRecordHandler

Expand Down Expand Up @@ -64,11 +64,12 @@ def _get_new_cache_file_path(
target_dir.mkdir(parents=True, exist_ok=True)
# If a stream contains a special Character, the temporary jsonl.gz
# file can't be created, because of OS restrictions. Therefore, we
# remove the special characters.
# remove the special characters, using the `LowerCaseNormalizer`.
# Specifically: we remove any of these characters: `<>:"/\|?*`
# and we remove characters in the ASCII range from 0 to 31.
cleaned_stream_name = re.sub(r'[<>:"/\\|?*\x00-\x1F]', "", stream_name)
return target_dir / f"{cleaned_stream_name}_{batch_id}{self.default_cache_file_suffix}"
normalizer = LowerCaseNormalizer()
normalized_stream_name = normalizer.normalize(stream_name)
return target_dir / f"{normalized_stream_name}_{batch_id}{self.default_cache_file_suffix}"

def _open_new_file(
self,
Expand Down

0 comments on commit ee76045

Please sign in to comment.