huggingface
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/lighteval/metrics/dynamic_metrics.py‎
Lines changed: 15 additions & 4 deletions b/‎src/lighteval/metrics/dynamic_metrics.py‎
Lines changed: 15 additions & 4 deletions
@@ -109,7 +109,7 @@ multilingual = [
     "jieba", # for chinese tokenizer
     "pyvi", # for vietnamese tokenizer
 ]
-math = ["latex2sympy2_extended>=0.9.3"]
+math = ["latex2sympy2_extended==1.0.4"]
 
 [project.urls]
 Homepage = "https://github.com/huggingface/lighteval"
 
@@ -193,6 +193,7 @@ def multilingual_extractive_match_metric(
     fallback_mode: Literal["no_fallback", "first_match"] = "first_match",
     extraction_mode: Literal["first_match", "any_match"] = "any_match",
     precision: int = 6,
+    timeout_seconds: int = 5,
 ) -> SampleLevelMetric:
     """Creates a language-aware extractive match metric that extracts answers from the model's output.
 
@@ -222,6 +223,8 @@ def multilingual_extractive_match_metric(
 
         precision: int
             Number of decimal places to use when comparing numerical values. Defaults to 6.
+        timeout_seconds: int
+            Timeout for the extraction (each attempt) and comparison. Defaults to 5.
 
     Returns:
         A sample level metric that extracts and compares mathematical expressions.
@@ -245,11 +248,12 @@ def sample_level_fn(golds: list[str], predictions: list[str], formatted_doc: Doc
         pred_extraction_regexes = get_extraction_regexes(formatted_doc, pred_extraction_target, language)
 
         extracted_predictions = [
-            extract_target_from_pred(pred, pred_extraction_regexes, fallback_mode, extraction_mode)
+            extract_target_from_pred(pred, pred_extraction_regexes, fallback_mode, extraction_mode, timeout_seconds)
             for pred in predictions
         ]
         extracted_golds = [
-            extract_target_from_pred(gold, gold_extraction_regexes, fallback_mode, extraction_mode) for gold in golds
+            extract_target_from_pred(gold, gold_extraction_regexes, fallback_mode, extraction_mode, timeout_seconds)
+            for gold in golds
         ]
 
         # Assert on empty gold and warn on empty pred
@@ -265,12 +269,19 @@ def sample_level_fn(golds: list[str], predictions: list[str], formatted_doc: Doc
         # We have to use timeout because the sypmy to str conversion can be very slow
         try:
             add_to_specifics_with_timeout(formatted_doc, extracted_predictions, extracted_golds)
-        except:  # noqa: E722
+        except Exception:  # noqa: E722
             logger.warning("Timeout when adding extracted predictions and golds to specific")
 
         return aggregation_function(
             [
-                (1.0 if any(compare_gold_target(gold, pred, precision) for gold in extracted_golds) else 0.0)
+                (
+                    1.0
+                    if any(
+                        compare_gold_target(gold, pred, precision, timeout_seconds=timeout_seconds)
+                        for gold in extracted_golds
+                    )
+                    else 0.0
+                )
                 for pred in extracted_predictions
             ]
         )
Original file line number	Diff line number	Diff line change
`@@ -109,7 +109,7 @@ multilingual = [`
`109`	`109`	`"jieba", # for chinese tokenizer`
`110`	`110`	`"pyvi", # for vietnamese tokenizer`
`111`	`111`	`]`
`112`		`-math = ["latex2sympy2_extended>=0.9.3"]`
	`112`	`+math = ["latex2sympy2_extended==1.0.4"]`
`113`	`113`
`114`	`114`	`[project.urls]`
`115`	`115`	`Homepage = "https://github.com/huggingface/lighteval"`