src/transformers/models/wav2vec2/tokenization_wav2vec2.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -603,7 +603,7 @@ def decode(
  
            >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")

            >>> # load first sample of English common_voice

            >>> dataset = load_dataset("common_voice", "en", split="train", streaming=True)

            >>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)

            >>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))

            >>> dataset_iter = iter(dataset)

            >>> sample = next(dataset_iter)

    @@ -626,10 +626,10 @@ def decode(
  
            ...     }

            ...     for d in outputs.word_offsets

            ... ]

            >>> # compare word offsets with audio `common_voice_en_100038.mp3` online on the dataset viewer:

            >>> # https://huggingface.co/datasets/common_voice/viewer/en/train

            >>> # compare word offsets with audio `en_train_0/common_voice_en_19121553.mp3` online on the dataset viewer:

            >>> # https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/viewer/en

            >>> word_offsets[:3]

            [{'word': 'WHY', 'start_time': 1.42, 'end_time': 1.54}, {'word': 'DOES', 'start_time': 1.64, 'end_time': 1.9}, {'word': 'MILISANDRA', 'start_time': 2.26, 'end_time': 2.9}]

            [{'word': 'THE', 'start_time': 0.7, 'end_time': 0.78}, {'word': 'TRICK', 'start_time': 0.88, 'end_time': 1.08}, {'word': 'APPEARS', 'start_time': 1.2, 'end_time': 1.64}]

            ```"""

            # Convert inputs to python lists

            token_ids = to_py_obj(token_ids)

src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -538,7 +538,7 @@ def decode(
  
            >>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")

            >>> # load first sample of English common_voice

            >>> dataset = load_dataset("common_voice", "en", split="train", streaming=True)

            >>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)

            >>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))

            >>> dataset_iter = iter(dataset)

            >>> sample = next(dataset_iter)

    @@ -561,10 +561,10 @@ def decode(
  
            ...     }

            ...     for d in outputs.word_offsets

            ... ]

            >>> # compare word offsets with audio `common_voice_en_100038.mp3` online on the dataset viewer:

            >>> # https://huggingface.co/datasets/common_voice/viewer/en/train

            >>> # compare word offsets with audio `en_train_0/common_voice_en_19121553.mp3` online on the dataset viewer:

            >>> # https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/viewer/en

            >>> word_offsets[:4]

            [{'word': 'WHY', 'start_time': 1.42, 'end_time': 1.54}, {'word': 'DOES', 'start_time': 1.66, 'end_time': 1.9}, {'word': 'MILISANDRA', 'start_time': 2.26, 'end_time': 2.9}, {'word': 'LOOK', 'start_time': 3.0, 'end_time': 3.16}]

            [{'word': 'THE', 'start_time': 0.68, 'end_time': 0.78}, {'word': 'TRACK', 'start_time': 0.88, 'end_time': 1.1}, {'word': 'APPEARS', 'start_time': 1.18, 'end_time': 1.66}, {'word': 'ON', 'start_time': 1.86, 'end_time': 1.92}]

            ```"""

            from pyctcdecode.constants import (

Fix 2 Wav2Vec2 related models' doctest #27462

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

ydshieh merged 5 commits into main from fix_wav2vec2_doctest

Nov 13, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix 2 Wav2Vec2 related models' doctest #27462

Uh oh!

Diff view

Diff view

There are no files selected for viewing