Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2637,19 +2637,6 @@ object functions {
RegExpExtract(e.expr, lit(exp).expr, lit(groupIdx).expr)
}

/**
* Extract all specific groups matched by a Java regex, from the specified string column.
* If the regex did not match, or the specified group did not match, return an empty array.
* if the specified group index exceeds the group count of regex, an IllegalArgumentException
* will be thrown.
*
* @group string_funcs
* @since 3.1.0
*/
def regexp_extract_all(e: Column, exp: String, groupIdx: Int): Column = withExpr {
RegExpExtractAll(e.expr, lit(exp).expr, lit(groupIdx).expr)
}

/**
* Replace all substrings of the specified string value that match regexp with rep.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,25 +154,9 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
Row("300", "100") :: Row("400", "100") :: Row("400-400", "100") :: Nil)
}

test("string regex_extract_all") {
val df = Seq(
("100-200,300-400", "(\\d+)-(\\d+)"),
("101-201,301-401", "(\\d+)-(\\d+)"),
("102-202,302-402", "(\\d+)")).toDF("a", "b")

checkAnswer(
df.select(
regexp_extract_all($"a", "(\\d+)-(\\d+)", 1),
regexp_extract_all($"a", "(\\d+)-(\\d+)", 2)),
Row(Seq("100", "300"), Seq("200", "400")) ::
Row(Seq("101", "301"), Seq("201", "401")) ::
Row(Seq("102", "302"), Seq("202", "402")) :: Nil)
}

test("non-matching optional group") {
val df = Seq(Tuple1("aaaac")).toDF("s")

// regexp_extract
checkAnswer(
df.select(regexp_extract($"s", "(foo)", 1)),
Row("")
Expand All @@ -181,16 +165,6 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
df.select(regexp_extract($"s", "(a+)(b)?(c)", 2)),
Row("")
)

// regexp_extract_all
checkAnswer(
df.select(regexp_extract_all($"s", "(foo)", 1)),
Row(Seq())
)
checkAnswer(
df.select(regexp_extract_all($"s", "(a+)(b)?(c)", 2)),
Row(Seq(""))
)
}

test("string ascii function") {
Expand Down