diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 685e8e16e5f8..f7c143d3049a 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -378,7 +378,12 @@ # any tokenizer/feature_extractor might be use for a given model so we cannot # use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING to # see if the model defines such objects or not. -MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"} +MULTI_MODEL_CONFIGS = { + "SpeechEncoderDecoderConfig", + "VisionEncoderDecoderConfig", + "VisionTextDualEncoderConfig", + "LayoutLMConfig", +} for task, values in SUPPORTED_TASKS.items(): if values["type"] == "text": NO_FEATURE_EXTRACTOR_TASKS.add(task) diff --git a/tests/pipelines/test_pipelines_object_detection.py b/tests/pipelines/test_pipelines_object_detection.py index 23a6dab29952..570026101f1f 100644 --- a/tests/pipelines/test_pipelines_object_detection.py +++ b/tests/pipelines/test_pipelines_object_detection.py @@ -256,17 +256,9 @@ def test_layoutlm(self): "https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png" ) self.assertEqual( - nested_simplify(outputs, decimals=4), + nested_simplify(outputs, decimals=3), [ - { - "score": 0.9982, - "label": "B-QUESTION", - "box": {"xmin": 654, "ymin": 165, "xmax": 719, "ymax": 719}, - }, - { - "score": 0.9982, - "label": "I-QUESTION", - "box": {"xmin": 691, "ymin": 202, "xmax": 735, "ymax": 735}, - }, + {"score": 0.998, "label": "B-QUESTION", "box": {"xmin": 462, "ymin": 234, "xmax": 508, "ymax": 249}}, + {"score": 0.999, "label": "I-QUESTION", "box": {"xmin": 489, "ymin": 286, "xmax": 519, "ymax": 301}}, ], )