From 4c3d23f806f6fa6d7bd755e94c44391f264062ca Mon Sep 17 00:00:00 2001
From: Sokwhan Huh <sokwhan@google.com>
Date: Fri, 10 Jan 2025 18:36:32 -0800
Subject: [PATCH] Add CelOptions for designating Regex program size

Addresses https://github.com/google/cel-java/issues/545

PiperOrigin-RevId: 714298569
---
 .../test/java/dev/cel/bundle/CelImplTest.java | 33 +++++++++++++++++++
 .../main/java/dev/cel/common/CelOptions.java  | 20 +++++++++--
 .../dev/cel/runtime/CelStandardFunctions.java |  5 ++-
 .../java/dev/cel/runtime/RuntimeHelpers.java  | 16 +++++++--
 4 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/bundle/src/test/java/dev/cel/bundle/CelImplTest.java b/bundle/src/test/java/dev/cel/bundle/CelImplTest.java
index db4b60fe1..93ee1c232 100644
--- a/bundle/src/test/java/dev/cel/bundle/CelImplTest.java
+++ b/bundle/src/test/java/dev/cel/bundle/CelImplTest.java
@@ -2032,6 +2032,39 @@ public void program_comprehensionDisabled_throws() throws Exception {
     assertThat(e.getErrorCode()).isEqualTo(CelErrorCode.ITERATION_BUDGET_EXCEEDED);
   }
 
+  @Test
+  public void program_regexProgramSizeUnderLimit_success() throws Exception {
+    Cel cel =
+        standardCelBuilderWithMacros()
+            .setOptions(CelOptions.current().regexMaxProgramSize(7).build())
+            .build();
+    // See
+    // https://github.com/google/re2j/blob/84237cbbd0fbd637c6eb6856717c1e248daae729/javatests/com/google/re2j/PatternTest.java#L175 for program size
+    CelAbstractSyntaxTree ast = cel.compile("'foo'.matches('(a+b)')").getAst();
+
+    assertThat(cel.createProgram(ast).eval()).isEqualTo(false);
+  }
+
+  @Test
+  public void program_regexProgramSizeExceedsLimit_throws() throws Exception {
+    Cel cel =
+        standardCelBuilderWithMacros()
+            .setOptions(CelOptions.current().regexMaxProgramSize(6).build())
+            .build();
+    // See
+    // https://github.com/google/re2j/blob/84237cbbd0fbd637c6eb6856717c1e248daae729/javatests/com/google/re2j/PatternTest.java#L175 for program size
+    CelAbstractSyntaxTree ast = cel.compile("'foo'.matches('(a+b)')").getAst();
+
+    CelEvaluationException e =
+        assertThrows(CelEvaluationException.class, () -> cel.createProgram(ast).eval());
+    assertThat(e)
+        .hasMessageThat()
+        .contains(
+            "evaluation error: Regex pattern exceeds allowed program size. Allowed: 6, Provided:"
+                + " 7");
+    assertThat(e.getErrorCode()).isEqualTo(CelErrorCode.INVALID_ARGUMENT);
+  }
+
   @Test
   public void toBuilder_isImmutable() {
     CelBuilder celBuilder = CelFactory.standardCelBuilder();
diff --git a/common/src/main/java/dev/cel/common/CelOptions.java b/common/src/main/java/dev/cel/common/CelOptions.java
index 3c07ab26d..38c8407fe 100644
--- a/common/src/main/java/dev/cel/common/CelOptions.java
+++ b/common/src/main/java/dev/cel/common/CelOptions.java
@@ -39,7 +39,7 @@ public enum ProtoUnsetFieldOptions {
     // Do not bind a field if it is unset. Repeated fields are bound as empty list.
     SKIP,
     // Bind the (proto api) default value for a field.
-    BIND_DEFAULT;
+    BIND_DEFAULT
   }
 
   public static final CelOptions DEFAULT = current().build();
@@ -121,6 +121,8 @@ public enum ProtoUnsetFieldOptions {
 
   public abstract boolean enableComprehension();
 
+  public abstract int regexMaxProgramSize();
+
   public abstract Builder toBuilder();
 
   public ImmutableSet<ExprFeatures> toExprFeatures() {
@@ -218,7 +220,8 @@ public static Builder newBuilder() {
         .enableStringConversion(true)
         .enableStringConcatenation(true)
         .enableListConcatenation(true)
-        .enableComprehension(true);
+        .enableComprehension(true)
+        .regexMaxProgramSize(-1);
   }
 
   /**
@@ -571,6 +574,19 @@ public abstract static class Builder {
      */
     public abstract Builder enableComprehension(boolean value);
 
+    /**
+     * Set maximum program size for RE2J regex.
+     *
+     * <p>The program size is a very approximate measure of a regexp's "cost". Larger numbers are
+     * more expensive than smaller numbers.
+     *
+     * <p>A negative {@code value} will disable the check.
+     *
+     * <p>There's no guarantee that RE2 program size has the exact same value across other CEL
+     * implementations (C++ and Go).
+     */
+    public abstract Builder regexMaxProgramSize(int value);
+
     public abstract CelOptions build();
   }
 }
diff --git a/runtime/src/main/java/dev/cel/runtime/CelStandardFunctions.java b/runtime/src/main/java/dev/cel/runtime/CelStandardFunctions.java
index bd1fc45f9..dc6ca979b 100644
--- a/runtime/src/main/java/dev/cel/runtime/CelStandardFunctions.java
+++ b/runtime/src/main/java/dev/cel/runtime/CelStandardFunctions.java
@@ -28,7 +28,6 @@
 import com.google.protobuf.Timestamp;
 import com.google.protobuf.util.Durations;
 import com.google.protobuf.util.Timestamps;
-import com.google.re2j.PatternSyntaxException;
 import dev.cel.common.CelErrorCode;
 import dev.cel.common.CelOptions;
 import dev.cel.common.internal.ComparisonFunctions;
@@ -1000,7 +999,7 @@ public enum StringMatchers implements StandardOverload {
                     (String string, String regexp) -> {
                       try {
                         return RuntimeHelpers.matches(string, regexp, bindingHelper.celOptions);
-                      } catch (PatternSyntaxException e) {
+                      } catch (RuntimeException e) {
                         throw new CelEvaluationException(
                             e.getMessage(), e, CelErrorCode.INVALID_ARGUMENT);
                       }
@@ -1015,7 +1014,7 @@ public enum StringMatchers implements StandardOverload {
                     (String string, String regexp) -> {
                       try {
                         return RuntimeHelpers.matches(string, regexp, bindingHelper.celOptions);
-                      } catch (PatternSyntaxException e) {
+                      } catch (RuntimeException e) {
                         throw new CelEvaluationException(
                             e.getMessage(), e, CelErrorCode.INVALID_ARGUMENT);
                       }
diff --git a/runtime/src/main/java/dev/cel/runtime/RuntimeHelpers.java b/runtime/src/main/java/dev/cel/runtime/RuntimeHelpers.java
index ffb979842..76d69f94e 100644
--- a/runtime/src/main/java/dev/cel/runtime/RuntimeHelpers.java
+++ b/runtime/src/main/java/dev/cel/runtime/RuntimeHelpers.java
@@ -74,12 +74,22 @@ public static boolean matches(String string, String regexp) {
   }
 
   public static boolean matches(String string, String regexp, CelOptions celOptions) {
+    Pattern pattern = Pattern.compile(regexp);
+    int maxProgramSize = celOptions.regexMaxProgramSize();
+    if (maxProgramSize >= 0 && pattern.programSize() > maxProgramSize) {
+      throw new IllegalArgumentException(
+          String.format(
+              "Regex pattern exceeds allowed program size. Allowed: %d, Provided: %d",
+              maxProgramSize, pattern.programSize()));
+    }
+
     if (!celOptions.enableRegexPartialMatch()) {
       // Uses re2 for consistency across languages.
-      return Pattern.matches(regexp, string);
+      return pattern.matcher(string).matches();
     }
-    // Return an unanchored match for the presence of the regexp anywher in the string.
-    return Pattern.compile(regexp).matcher(string).find();
+
+    // Return an unanchored match for the presence of the regexp anywhere in the string.
+    return pattern.matcher(string).find();
   }
 
   /** Create a compiled pattern for the given regular expression. */