From 4053d7684c6c1cc630a892c47b57e6075b3cd951 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 18 Dec 2023 10:21:09 -0500 Subject: [PATCH] Preserve regex captures across stack frames (#1447) * privatize state.RegexCaptures * stack frame for regex captures * merge * unit-test case * docs re stack frames for regex captures * more --- docs/src/data-diving-examples.md | 46 +++++++-------- docs/src/reference-dsl-builtin-functions.md | 18 +----- .../src/reference-main-regular-expressions.md | 20 ++++++- .../reference-main-regular-expressions.md.in | 20 ++++++- docs/src/reference-verbs.md | 38 ++++++------- docs/src/two-pass-algorithms.md | 4 +- pkg/dsl/cst/builtin_functions.go | 2 +- pkg/dsl/cst/leaves.go | 4 +- pkg/dsl/cst/udf.go | 2 + pkg/dsl/cst/uds.go | 2 + pkg/lib/util.go | 3 + pkg/runtime/state.go | 57 +++++++++++++++---- test/cases/dsl-regex-matching/0017/cmd | 1 + test/cases/dsl-regex-matching/0017/experr | 0 test/cases/dsl-regex-matching/0017/expout | 6 ++ test/cases/dsl-regex-matching/0017/mlr | 15 +++++ test/cases/dsl-regex-matching/0018/cmd | 1 + test/cases/dsl-regex-matching/0018/experr | 0 test/cases/dsl-regex-matching/0018/expout | 6 ++ test/cases/dsl-regex-matching/0018/mlr | 15 +++++ 20 files changed, 183 insertions(+), 77 deletions(-) create mode 100644 test/cases/dsl-regex-matching/0017/cmd create mode 100644 test/cases/dsl-regex-matching/0017/experr create mode 100644 test/cases/dsl-regex-matching/0017/expout create mode 100644 test/cases/dsl-regex-matching/0017/mlr create mode 100644 test/cases/dsl-regex-matching/0018/cmd create mode 100644 test/cases/dsl-regex-matching/0018/experr create mode 100644 test/cases/dsl-regex-matching/0018/expout create mode 100644 test/cases/dsl-regex-matching/0018/mlr diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 39738f193d..100716ec26 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index d391e83419..8c3b496407 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). -* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [match](#match), [matchx](#matchx), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). +* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -1296,22 +1296,6 @@ lstrip (class=string #args=1) Strip leading whitespace from string. -### match -
-match  (class=string #args=2) TODO: WRITE ME
-Example:
-TODO: WRITE ME
-
- - -### matchx -
-matchx  (class=string #args=2) TODO: WRITE ME
-Example:
-TODO: WRITE ME
-
- - ### regextract
 regextract  (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does.
diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md
index f15b55f596..c221c48dec 100644
--- a/docs/src/reference-main-regular-expressions.md
+++ b/docs/src/reference-main-regular-expressions.md
@@ -63,7 +63,7 @@ name=bull,regex=^b[ou]ll$
 
 ## Regex captures
 
-Regex captures of the form `\0` through `\9` are supported as
+Regex captures of the form `\0` through `\9` are supported as follows:
 
 * Captures have in-function context for `sub` and `gsub`. For example, the first `\1,\2` pair belong to the first `sub` and the second `\1,\2` pair belong to the second `sub`:
 
@@ -77,6 +77,24 @@ Regex captures of the form `\0` through `\9` are supported as
 mlr put '$a =~ "(..)_(....); $b = "left_\1"; $c = "right_\2"'
 
+* Each user-defined function has its own frame for captures. For example: + +
+mlr -n put '
+func f() {
+    if ("456 defg" =~ "([0-9]+) ([a-z]+)") {
+        print "INNER: \1 \2";
+    }
+}
+end {
+    if ("123 abc" =~ "([0-9]+) ([a-z]+)") {
+        print "OUTER PRE:  \1 \2";
+        f();
+        print "OUTER POST: \1 \2";
+    }
+}'
+
+ * The captures are not retained across multiple puts. For example, here the `\1,\2` won't be expanded from the regex capture:
diff --git a/docs/src/reference-main-regular-expressions.md.in b/docs/src/reference-main-regular-expressions.md.in
index e81f245528..c2fc7b0499 100644
--- a/docs/src/reference-main-regular-expressions.md.in
+++ b/docs/src/reference-main-regular-expressions.md.in
@@ -38,7 +38,7 @@ GENMD-EOF
 
 ## Regex captures
 
-Regex captures of the form `\0` through `\9` are supported as
+Regex captures of the form `\0` through `\9` are supported as follows:
 
 * Captures have in-function context for `sub` and `gsub`. For example, the first `\1,\2` pair belong to the first `sub` and the second `\1,\2` pair belong to the second `sub`:
 
@@ -52,6 +52,24 @@ GENMD-SHOW-COMMAND
 mlr put '$a =~ "(..)_(....); $b = "left_\1"; $c = "right_\2"'
 GENMD-EOF
 
+* Each user-defined function has its own frame for captures. For example:
+
+GENMD-SHOW-COMMAND
+mlr -n put '
+func f() {
+    if ("456 defg" =~ "([0-9]+) ([a-z]+)") {
+        print "INNER: \1 \2";
+    }
+}
+end {
+    if ("123 abc" =~ "([0-9]+) ([a-z]+)") {
+        print "OUTER PRE:  \1 \2";
+        f();
+        print "OUTER POST: \1 \2";
+    }
+}'
+GENMD-EOF
+
 * The captures are not retained across multiple puts. For example, here the `\1,\2` won't be expanded from the regex capture:
 
 GENMD-SHOW-COMMAND
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index 89bbc2b71f..106ad4bf17 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -3406,14 +3406,14 @@ fields, optionally categorized by one or more fields.
   data/medium
 
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3422,12 +3422,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3513,11 +3513,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3794,9 +3794,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e1..e475aebf3b 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go
index 397e7869cd..ef5a6fb98e 100644
--- a/pkg/dsl/cst/builtin_functions.go
+++ b/pkg/dsl/cst/builtin_functions.go
@@ -450,7 +450,7 @@ func (node *RegexCaptureBinaryFunctionCallsiteNode) Evaluate(
 		node.evaluable1.Evaluate(state),
 		node.evaluable2.Evaluate(state),
 	)
-	state.RegexCaptures = captures
+	state.SetRegexCaptures(captures)
 	return output
 }
 
diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go
index c0b4d88757..0e3621d7de 100644
--- a/pkg/dsl/cst/leaves.go
+++ b/pkg/dsl/cst/leaves.go
@@ -293,7 +293,7 @@ func (node *StringLiteralNode) Evaluate(
 //	}
 //
 // the captures can be set (by =~ or !=~) quite far from where they are used.
-// This is why we consult the state.RegexCaptures here, to see if they've been
+// This is why we consult the state's regex captures here, to see if they've been
 // set on some previous invocation of =~ or !=~.
 func (node *RegexCaptureReplacementNode) Evaluate(
 	state *runtime.State,
@@ -302,7 +302,7 @@ func (node *RegexCaptureReplacementNode) Evaluate(
 		lib.InterpolateCaptures(
 			node.replacementString,
 			node.replacementCaptureMatrix,
-			state.RegexCaptures,
+			state.GetRegexCaptures(),
 		),
 	)
 }
diff --git a/pkg/dsl/cst/udf.go b/pkg/dsl/cst/udf.go
index 9be4bf59c0..042366afce 100644
--- a/pkg/dsl/cst/udf.go
+++ b/pkg/dsl/cst/udf.go
@@ -223,6 +223,8 @@ func (site *UDFCallsite) EvaluateWithArguments(
 		state.Stack.PushStackFrameSet()
 		defer state.Stack.PopStackFrameSet()
 	}
+	state.PushRegexCapturesFrame()
+	defer state.PopRegexCapturesFrame()
 
 	cacheable := !udf.isFunctionLiteral
 
diff --git a/pkg/dsl/cst/uds.go b/pkg/dsl/cst/uds.go
index 3a72e4c238..2ed14fa564 100644
--- a/pkg/dsl/cst/uds.go
+++ b/pkg/dsl/cst/uds.go
@@ -120,6 +120,8 @@ func (site *UDSCallsite) Execute(state *runtime.State) (*BlockExitPayload, error
 	// Bind the arguments to the parameters
 	state.Stack.PushStackFrameSet()
 	defer state.Stack.PopStackFrameSet()
+	state.PushRegexCapturesFrame()
+	defer state.PopRegexCapturesFrame()
 
 	for i := range arguments {
 		err := state.Stack.DefineTypedAtScope(
diff --git a/pkg/lib/util.go b/pkg/lib/util.go
index 4a8faa86df..d78809d215 100644
--- a/pkg/lib/util.go
+++ b/pkg/lib/util.go
@@ -209,6 +209,9 @@ func WriteTempFileOrDie(contents string) string {
 }
 
 func CopyStringArray(input []string) []string {
+	if input == nil {
+		return nil
+	}
 	output := make([]string, len(input))
 	copy(output, input)
 	return output
diff --git a/pkg/runtime/state.go b/pkg/runtime/state.go
index 820f40c3dd..cfd9e11a7d 100644
--- a/pkg/runtime/state.go
+++ b/pkg/runtime/state.go
@@ -25,27 +25,42 @@ type State struct {
 
 	// For holding "\0".."\9" between where they are set via things like
 	// '$x =~ "(..)_(...)"', and interpolated via things like '$y = "\2:\1"'.
-	RegexCaptures []string
-	Options       *cli.TOptions
+	//
+	// Each top-level block and user-defined function has its own captures.
+	//
+	// For example, in function `f()`, one can do `somevar =~ someregex`, then
+	// call some function `g()` which also uses `=~`, and then when `g()` returns,
+	// `f()` will have its "\1", "\2", etc intact.
+	//
+	// This is necessary for the stateful semantics of `=~` and "\1", "\2", etc.
+	// Those are avoided when the user calls `matchx`, which is newer, and
+	// stateless. However, `=~` exists in the Miller DSL and we must support it.
+	regexCapturesByFrame *list.List // list of []string
+
+	Options *cli.TOptions
 
 	// StrictMode allows for runtime handling of absent-reads and untyped assignments.
 	StrictMode bool
 }
 
 func NewEmptyState(options *cli.TOptions, strictMode bool) *State {
+
+	// See lib.MakeEmptyCaptures for context.
+	regexCapturesByFrame := list.New()
+	regexCapturesByFrame.PushFront(lib.MakeEmptyCaptures())
+
 	oosvars := mlrval.NewMlrmap()
 	return &State{
-		Inrec:            nil,
-		Context:          nil,
-		Oosvars:          oosvars,
-		FilterExpression: mlrval.TRUE,
-		Stack:            NewStack(),
+		Inrec:                nil,
+		Context:              nil,
+		Oosvars:              oosvars,
+		FilterExpression:     mlrval.TRUE,
+		Stack:                NewStack(),
+		regexCapturesByFrame: regexCapturesByFrame,
 
 		// OutputRecordsAndContexts is assigned after construction
 
-		// See lib.MakeEmptyCaptures for context.
-		RegexCaptures: lib.MakeEmptyCaptures(),
-		Options:       options,
+		Options: options,
 
 		StrictMode: strictMode,
 	}
@@ -57,5 +72,25 @@ func (state *State) Update(
 ) {
 	state.Inrec = inrec
 	state.Context = context
-	state.RegexCaptures = lib.MakeEmptyCaptures()
+	state.regexCapturesByFrame.Front().Value = lib.MakeEmptyCaptures()
+}
+
+func (state *State) SetRegexCaptures(
+	captures []string,
+) {
+	state.regexCapturesByFrame.Front().Value = lib.CopyStringArray(captures)
+}
+
+func (state *State) GetRegexCaptures() []string {
+	regexCaptures := state.regexCapturesByFrame.Front().Value.([]string)
+	return lib.CopyStringArray(regexCaptures)
+}
+
+func (state *State) PushRegexCapturesFrame() {
+	state.regexCapturesByFrame.PushFront(lib.MakeEmptyCaptures())
+}
+
+func (state *State) PopRegexCapturesFrame() {
+	// There is no PopFront
+	state.regexCapturesByFrame.Remove(state.regexCapturesByFrame.Front())
 }
diff --git a/test/cases/dsl-regex-matching/0017/cmd b/test/cases/dsl-regex-matching/0017/cmd
new file mode 100644
index 0000000000..6add080d45
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0017/cmd
@@ -0,0 +1 @@
+mlr -n put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-regex-matching/0017/experr b/test/cases/dsl-regex-matching/0017/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-regex-matching/0017/expout b/test/cases/dsl-regex-matching/0017/expout
new file mode 100644
index 0000000000..860e81046d
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0017/expout
@@ -0,0 +1,6 @@
+OUTER PRE:  123 abc
+OUTER PRE:  123 abc
+INNER: 456 defg
+INNER: 456 defg
+OUTER POST: 123 abc
+OUTER POST: 123 abc
diff --git a/test/cases/dsl-regex-matching/0017/mlr b/test/cases/dsl-regex-matching/0017/mlr
new file mode 100644
index 0000000000..bec25114e2
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0017/mlr
@@ -0,0 +1,15 @@
+func f() {
+    if ("456 defg" =~ "([0-9]+) ([a-z]+)") {
+        print "INNER: \1 \2";
+        print "INNER: \1 \2";
+    }
+}
+end {
+    if ("123 abc" =~ "([0-9]+) ([a-z]+)") {
+        print "OUTER PRE:  \1 \2";
+        print "OUTER PRE:  \1 \2";
+        f();
+        print "OUTER POST: \1 \2";
+        print "OUTER POST: \1 \2";
+    }
+}
diff --git a/test/cases/dsl-regex-matching/0018/cmd b/test/cases/dsl-regex-matching/0018/cmd
new file mode 100644
index 0000000000..6add080d45
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0018/cmd
@@ -0,0 +1 @@
+mlr -n put -f ${CASEDIR}/mlr
diff --git a/test/cases/dsl-regex-matching/0018/experr b/test/cases/dsl-regex-matching/0018/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/dsl-regex-matching/0018/expout b/test/cases/dsl-regex-matching/0018/expout
new file mode 100644
index 0000000000..860e81046d
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0018/expout
@@ -0,0 +1,6 @@
+OUTER PRE:  123 abc
+OUTER PRE:  123 abc
+INNER: 456 defg
+INNER: 456 defg
+OUTER POST: 123 abc
+OUTER POST: 123 abc
diff --git a/test/cases/dsl-regex-matching/0018/mlr b/test/cases/dsl-regex-matching/0018/mlr
new file mode 100644
index 0000000000..992fa1d0be
--- /dev/null
+++ b/test/cases/dsl-regex-matching/0018/mlr
@@ -0,0 +1,15 @@
+subr s() {
+    if ("456 defg" =~ "([0-9]+) ([a-z]+)") {
+        print "INNER: \1 \2";
+        print "INNER: \1 \2";
+    }
+}
+end {
+    if ("123 abc" =~ "([0-9]+) ([a-z]+)") {
+        print "OUTER PRE:  \1 \2";
+        print "OUTER PRE:  \1 \2";
+        call s();
+        print "OUTER POST: \1 \2";
+        print "OUTER POST: \1 \2";
+    }
+}