From e5eef153690400d0b7a0669d009c13cabb97c4e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20M=C3=BCller?= <straightshoota@gmail.com>
Date: Wed, 30 Nov 2022 18:23:22 +0100
Subject: [PATCH 1/6] Extract common code into `MatchData#byte_range` helper

---
 src/regex/match_data.cr | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)
diff --git a/src/regex/match_data.cr b/src/regex/match_data.cr
index 085c829073e8..4b2d9164d43d 100644
--- a/src/regex/match_data.cr
+++ b/src/regex/match_data.cr
@@ -109,10 +109,7 @@ class Regex
     # ```
     def byte_begin(n = 0) : Int32
       check_index_out_of_bounds n
-      n += size if n < 0
-      value = @ovector[n * 2]
-      raise_capture_group_was_not_matched(n) if value < 0
-      value
+      byte_range(n) { |normalized_n| raise_capture_group_was_not_matched(normalized_n) }.begin
     end
 
     # Returns the position of the next byte after the match.
@@ -132,10 +129,17 @@ class Regex
     # ```
     def byte_end(n = 0) : Int32
       check_index_out_of_bounds n
+      byte_range(n) { |normalized_n| raise_capture_group_was_not_matched(normalized_n) }.end
+    end
+
+    private def byte_range(n, &)
       n += size if n < 0
-      value = @ovector[n * 2 + 1]
-      raise_capture_group_was_not_matched(n) if value < 0
-      value
+      range = Range.new(@ovector[n * 2], @ovector[n * 2 + 1], exclusive: true)
+      if range.begin < 0 || range.end < 0
+        yield n
+      else
+        range
+      end
     end
 
     # Returns the match of the *n*th capture group, or `nil` if there isn't
@@ -151,11 +155,8 @@ class Regex
     def []?(n : Int) : String?
       return unless valid_group?(n)
 
-      n += size if n < 0
-      start = @ovector[n * 2]
-      finish = @ovector[n * 2 + 1]
-      return if start < 0
-      @string.byte_slice(start, finish - start)
+      range = byte_range(n) { return nil }
+      @string.byte_slice(range.begin, range.end - range.begin)
     end
 
     # Returns the match of the *n*th capture group, or raises an `IndexError`
@@ -167,11 +168,9 @@ class Regex
     # ```
     def [](n : Int) : String
       check_index_out_of_bounds n
-      n += size if n < 0
 
-      value = self[n]?
-      raise_capture_group_was_not_matched n if value.nil?
-      value
+      range = byte_range(n) { |normalized_n| raise_capture_group_was_not_matched(normalized_n) }
+      @string.byte_slice(range.begin, range.end - range.begin)
     end
 
     # Returns the match of the capture group named by *group_name*, or

From 4897e150403effe0ac927bd57467d481c564f19d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20M=C3=BCller?= <straightshoota@gmail.com>
Date: Wed, 30 Nov 2022 23:18:53 +0100
Subject: [PATCH 2/6] Extract common code into `Matchdata#fetch_impl` helper

---
 src/regex/match_data.cr | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/src/regex/match_data.cr b/src/regex/match_data.cr
index 4b2d9164d43d..a3cda23f18a4 100644
--- a/src/regex/match_data.cr
+++ b/src/regex/match_data.cr
@@ -188,16 +188,7 @@ class Regex
     # "Crystal".match(/(?<ok>Cr).*(?<ok>al)/).not_nil!["ok"]? # => "al"
     # ```
     def []?(group_name : String) : String?
-      max_start = -1
-      match = nil
-      named_capture_number(group_name) do |n|
-        start = @ovector[n * 2]
-        if start > max_start
-          max_start = start
-          match = self[n]?
-        end
-      end
-      match
+      fetch_impl(group_name) { nil }
     end
 
     # Returns the match of the capture group named by *group_name*, or
@@ -215,14 +206,32 @@ class Regex
     # "Crystal".match(/(?<ok>Cr).*(?<ok>al)/).not_nil!["ok"] # => "al"
     # ```
     def [](group_name : String) : String
-      match = self[group_name]?
-      unless match
-        named_capture_number(group_name) do
+      fetch_impl(group_name) { |exists|
+        if exists
           raise KeyError.new("Capture group '#{group_name}' was not matched")
+        else
+          raise KeyError.new("Capture group '#{group_name}' does not exist")
+        end
+      }
+    end
+
+    private def fetch_impl(group_name : String)
+      max_start = -1
+      match = nil
+      exists = false
+      named_capture_number(group_name) do |n|
+        exists = true
+        start = byte_range(n) { nil }.try(&.begin) || next
+        if start > max_start
+          max_start = start
+          match = self[n]?
         end
-        raise KeyError.new("Capture group '#{group_name}' does not exist")
       end
-      match
+      if match
+        match
+      else
+        yield exists
+      end
     end
 
     # Returns all matches that are within the given range.

From 3ed9d7f2e016923ac7ccce8d16eaec5c525eaf10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20M=C3=BCller?= <straightshoota@gmail.com>
Date: Sat, 26 Nov 2022 00:50:50 +0100
Subject: [PATCH 3/6] Extract engine details into `Regex::Engine`

---
 src/regex.cr        | 90 ++++++++-------------------------------------
 src/regex/engine.cr |  4 ++
 src/regex/pcre.cr   | 88 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 75 deletions(-)
 create mode 100644 src/regex/engine.cr
 create mode 100644 src/regex/pcre.cr

diff --git a/src/regex.cr b/src/regex.cr
index b3ccd7bf0bec..136671e2f65c 100644
--- a/src/regex.cr
+++ b/src/regex.cr
@@ -1,4 +1,5 @@
-require "./regex/*"
+require "./regex/engine"
+require "./regex/match_data"
 
 # A `Regex` represents a regular expression, a pattern that describes the
 # contents of strings. A `Regex` can determine whether or not a string matches
@@ -195,6 +196,8 @@ require "./regex/*"
 # `Hash` of `String` => `Int32`, and therefore requires named capture groups to have
 # unique names within a single `Regex`.
 class Regex
+  include Regex::Engine
+
   # List of metacharacters that need to be escaped.
   #
   # See `Regex.needs_escape?` and `Regex.escape`.
@@ -253,28 +256,8 @@ class Regex
   # options = Regex::Options::IGNORE_CASE | Regex::Options::EXTENDED
   # Regex.new("dog", options) # => /dog/ix
   # ```
-  def initialize(source : String, @options : Options = Options::None)
-    # PCRE's pattern must have their null characters escaped
-    source = source.gsub('\u{0}', "\\0")
-    @source = source
-
-    @re = LibPCRE.compile(@source, (options | Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES | Options::UCP), out errptr, out erroffset, nil)
-    raise ArgumentError.new("#{String.new(errptr)} at #{erroffset}") if @re.null?
-    @extra = LibPCRE.study(@re, LibPCRE::STUDY_JIT_COMPILE, out studyerrptr)
-    if @extra.null? && studyerrptr
-      {% unless flag?(:interpreted) %}
-        LibPCRE.free.call @re.as(Void*)
-      {% end %}
-      raise ArgumentError.new("#{String.new(studyerrptr)}")
-    end
-    LibPCRE.full_info(@re, nil, LibPCRE::INFO_CAPTURECOUNT, out @captures)
-  end
-
-  def finalize
-    LibPCRE.free_study @extra
-    {% unless flag?(:interpreted) %}
-      LibPCRE.free.call @re.as(Void*)
-    {% end %}
+  def self.new(source : String, options : Options = Options::None)
+    new(_source: source, _options: options)
   end
 
   # Determines Regex's source validity. If it is, `nil` is returned.
@@ -285,15 +268,7 @@ class Regex
   # Regex.error?("(foo|bar")  # => "missing ) at 8"
   # ```
   def self.error?(source) : String?
-    re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil)
-    if re
-      {% unless flag?(:interpreted) %}
-        LibPCRE.free.call re.as(Void*)
-      {% end %}
-      nil
-    else
-      "#{String.new(errptr)} at #{erroffset}"
-    end
+    Engine.error_impl(source)
   end
 
   # Returns `true` if *char* need to be escaped, `false` otherwise.
@@ -485,12 +460,10 @@ class Regex
   # ```
   def match(str, pos = 0, options = Regex::Options::None) : MatchData?
     if byte_index = str.char_index_to_byte_index(pos)
-      match = match_at_byte_index(str, byte_index, options)
+      $~ = match_at_byte_index(str, byte_index, options)
     else
-      match = nil
+      $~ = nil
     end
-
-    $~ = match
   end
 
   # Match at byte index. Matches a regular expression against `String`
@@ -504,17 +477,11 @@ class Regex
   # /(.)(.)/.match_at_byte_index("クリスタル", 3).try &.[2] # => "ス"
   # ```
   def match_at_byte_index(str, byte_index = 0, options = Regex::Options::None) : MatchData?
-    return ($~ = nil) if byte_index > str.bytesize
-
-    ovector_size = (@captures + 1) * 3
-    ovector = Pointer(Int32).malloc(ovector_size)
-    if internal_matches?(str, byte_index, options, ovector, ovector_size)
-      match = MatchData.new(self, @re, str, byte_index, ovector, @captures)
+    if byte_index > str.bytesize
+      $~ = nil
     else
-      match = nil
+      $~ = match_impl(str, byte_index, options)
     end
-
-    $~ = match
   end
 
   # Match at character index. It behaves like `#match`, however it returns `Bool` value.
@@ -540,14 +507,7 @@ class Regex
   def matches_at_byte_index?(str, byte_index = 0, options = Regex::Options::None) : Bool
     return false if byte_index > str.bytesize
 
-    internal_matches?(str, byte_index, options, nil, 0)
-  end
-
-  # Calls `pcre_exec` C function, and handles returning value.
-  private def internal_matches?(str, byte_index, options, ovector, ovector_size)
-    ret = LibPCRE.exec(@re, @extra, str, str.bytesize, byte_index, (options | Options::NO_UTF8_CHECK), ovector, ovector_size)
-    # TODO: when `ret < -1`, it means PCRE error. It should handle correctly.
-    ret >= 0
+    matches_impl(str, byte_index, options)
   end
 
   # Returns a `Hash` where the values are the names of capture groups and the
@@ -561,26 +521,7 @@ class Regex
   # /(.)(?<foo>.)(.)(?<bar>.)(.)/.name_table # => {4 => "bar", 2 => "foo"}
   # ```
   def name_table : Hash(Int32, String)
-    LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMECOUNT, out name_count)
-    LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMEENTRYSIZE, out name_entry_size)
-    table_pointer = Pointer(UInt8).null
-    LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMETABLE, pointerof(table_pointer).as(Pointer(Int32)))
-    name_table = table_pointer.to_slice(name_entry_size*name_count)
-
-    lookup = Hash(Int32, String).new
-
-    name_count.times do |i|
-      capture_offset = i * name_entry_size
-      capture_number = ((name_table[capture_offset].to_u16 << 8)).to_i32 | name_table[capture_offset + 1]
-
-      name_offset = capture_offset + 2
-      checked = name_table[name_offset, name_entry_size - 3]
-      name = String.new(checked.to_unsafe)
-
-      lookup[capture_number] = name
-    end
-
-    lookup
+    name_table_impl
   end
 
   # Returns the number of (named & non-named) capture groups.
@@ -592,8 +533,7 @@ class Regex
   # /(.)|(.)/.capture_count    # => 2
   # ```
   def capture_count : Int32
-    LibPCRE.full_info(@re, @extra, LibPCRE::INFO_CAPTURECOUNT, out capture_count)
-    capture_count
+    capture_count_impl
   end
 
   # Convert to `String` in subpattern format. Produces a `String` which can be
diff --git a/src/regex/engine.cr b/src/regex/engine.cr
new file mode 100644
index 000000000000..ad69e5d034bf
--- /dev/null
+++ b/src/regex/engine.cr
@@ -0,0 +1,4 @@
+require "./pcre"
+
+# :nodoc:
+alias Regex::Engine = PCRE
diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr
new file mode 100644
index 000000000000..400106e57102
--- /dev/null
+++ b/src/regex/pcre.cr
@@ -0,0 +1,88 @@
+require "./lib_pcre"
+
+# :nodoc:
+module Regex::PCRE
+  # :nodoc:
+  def initialize(*, _source source, _options @options)
+    # PCRE's pattern must have their null characters escaped
+    source = source.gsub('\u{0}', "\\0")
+    @source = source
+
+    @re = LibPCRE.compile(@source, (options | ::Regex::Options::UTF_8 | ::Regex::Options::NO_UTF8_CHECK | ::Regex::Options::DUPNAMES | ::Regex::Options::UCP), out errptr, out erroffset, nil)
+    raise ArgumentError.new("#{String.new(errptr)} at #{erroffset}") if @re.null?
+    @extra = LibPCRE.study(@re, LibPCRE::STUDY_JIT_COMPILE, out studyerrptr)
+    if @extra.null? && studyerrptr
+      {% unless flag?(:interpreted) %}
+        LibPCRE.free.call @re.as(Void*)
+      {% end %}
+      raise ArgumentError.new("#{String.new(studyerrptr)}")
+    end
+    LibPCRE.full_info(@re, nil, LibPCRE::INFO_CAPTURECOUNT, out @captures)
+  end
+
+  def finalize
+    LibPCRE.free_study @extra
+    {% unless flag?(:interpreted) %}
+      LibPCRE.free.call @re.as(Void*)
+    {% end %}
+  end
+
+  protected def self.error_impl(source)
+    re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil)
+    if re
+      {% unless flag?(:interpreted) %}
+        LibPCRE.free.call re.as(Void*)
+      {% end %}
+      nil
+    else
+      "#{String.new(errptr)} at #{erroffset}"
+    end
+  end
+
+  private def name_table_impl
+    LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMECOUNT, out name_count)
+    LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMEENTRYSIZE, out name_entry_size)
+    table_pointer = Pointer(UInt8).null
+    LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMETABLE, pointerof(table_pointer).as(Pointer(Int32)))
+    name_table = table_pointer.to_slice(name_entry_size*name_count)
+
+    lookup = Hash(Int32, String).new
+
+    name_count.times do |i|
+      capture_offset = i * name_entry_size
+      capture_number = ((name_table[capture_offset].to_u16 << 8)).to_i32 | name_table[capture_offset + 1]
+
+      name_offset = capture_offset + 2
+      checked = name_table[name_offset, name_entry_size - 3]
+      name = String.new(checked.to_unsafe)
+
+      lookup[capture_number] = name
+    end
+
+    lookup
+  end
+
+  private def capture_count_impl
+    LibPCRE.full_info(@re, @extra, LibPCRE::INFO_CAPTURECOUNT, out capture_count)
+    capture_count
+  end
+
+  private def match_impl(str, byte_index, options)
+    ovector_size = (@captures + 1) * 3
+    ovector = Pointer(Int32).malloc(ovector_size)
+    if internal_matches?(str, byte_index, options, ovector, ovector_size)
+      MatchData.new(self, @re, str, byte_index, ovector, @captures)
+    end
+  end
+
+  private def matches_impl(str, byte_index, options)
+    internal_matches?(str, byte_index, options, nil, 0)
+  end
+
+  # Calls `pcre_exec` C function, and handles returning value.
+  private def internal_matches?(str, byte_index, options, ovector, ovector_size)
+    ret = LibPCRE.exec(@re, @extra, str, str.bytesize, byte_index, (options | ::Regex::Options::NO_UTF8_CHECK), ovector, ovector_size)
+    # TODO: when `ret < -1`, it means PCRE error. It should handle correctly.
+    ret >= 0
+  end
+end

From 38d614df5a27a4d6369221b2e73823419cefe6e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20M=C3=BCller?= <straightshoota@gmail.com>
Date: Sat, 26 Nov 2022 01:07:41 +0100
Subject: [PATCH 4/6] Extract implementation details from `Regex::Options`

---
 src/regex/lib_pcre.cr | 10 ++++++++++
 src/regex/pcre.cr     | 26 +++++++++++++++++++++++---
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/regex/lib_pcre.cr b/src/regex/lib_pcre.cr
index 2182153870f0..cf32142c5358 100644
--- a/src/regex/lib_pcre.cr
+++ b/src/regex/lib_pcre.cr
@@ -2,6 +2,16 @@
 lib LibPCRE
   alias Int = LibC::Int
 
+  CASELESS      = 0x00000001
+  MULTILINE     = 0x00000002
+  DOTALL        = 0x00000004
+  EXTENDED      = 0x00000008
+  ANCHORED      = 0x00000010
+  UTF8          = 0x00000800
+  NO_UTF8_CHECK = 0x00002000
+  DUPNAMES      = 0x00080000
+  UCP           = 0x20000000
+
   type Pcre = Void*
   type PcreExtra = Void*
   fun compile = pcre_compile(pattern : UInt8*, options : Int, errptr : UInt8**, erroffset : Int*, tableptr : Void*) : Pcre
diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr
index 400106e57102..0147381ec281 100644
--- a/src/regex/pcre.cr
+++ b/src/regex/pcre.cr
@@ -8,7 +8,7 @@ module Regex::PCRE
     source = source.gsub('\u{0}', "\\0")
     @source = source
 
-    @re = LibPCRE.compile(@source, (options | ::Regex::Options::UTF_8 | ::Regex::Options::NO_UTF8_CHECK | ::Regex::Options::DUPNAMES | ::Regex::Options::UCP), out errptr, out erroffset, nil)
+    @re = LibPCRE.compile(@source, pcre_options(options) | LibPCRE::UTF8 | LibPCRE::NO_UTF8_CHECK | LibPCRE::DUPNAMES | LibPCRE::UCP, out errptr, out erroffset, nil)
     raise ArgumentError.new("#{String.new(errptr)} at #{erroffset}") if @re.null?
     @extra = LibPCRE.study(@re, LibPCRE::STUDY_JIT_COMPILE, out studyerrptr)
     if @extra.null? && studyerrptr
@@ -20,6 +20,26 @@ module Regex::PCRE
     LibPCRE.full_info(@re, nil, LibPCRE::INFO_CAPTURECOUNT, out @captures)
   end
 
+  private def pcre_options(options)
+    flag = 0
+    options.each do |option|
+      flag |= case option
+              when .ignore_case?   then LibPCRE::CASELESS
+              when .multiline?     then LibPCRE::DOTALL | LibPCRE::MULTILINE
+              when .extended?      then LibPCRE::EXTENDED
+              when .anchored?      then LibPCRE::ANCHORED
+              when .utf_8?         then LibPCRE::UTF8
+              when .no_utf8_check? then LibPCRE::NO_UTF8_CHECK
+              when .dupnames?      then LibPCRE::DUPNAMES
+              when .ucp?           then LibPCRE::UCP
+              else
+                # Unnamed values are explicitly used PCRE options, just pass them through:
+                option.value
+              end
+    end
+    flag
+  end
+
   def finalize
     LibPCRE.free_study @extra
     {% unless flag?(:interpreted) %}
@@ -28,7 +48,7 @@ module Regex::PCRE
   end
 
   protected def self.error_impl(source)
-    re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil)
+    re = LibPCRE.compile(source, LibPCRE::UTF8 | LibPCRE::NO_UTF8_CHECK | LibPCRE::DUPNAMES, out errptr, out erroffset, nil)
     if re
       {% unless flag?(:interpreted) %}
         LibPCRE.free.call re.as(Void*)
@@ -81,7 +101,7 @@ module Regex::PCRE
 
   # Calls `pcre_exec` C function, and handles returning value.
   private def internal_matches?(str, byte_index, options, ovector, ovector_size)
-    ret = LibPCRE.exec(@re, @extra, str, str.bytesize, byte_index, (options | ::Regex::Options::NO_UTF8_CHECK), ovector, ovector_size)
+    ret = LibPCRE.exec(@re, @extra, str, str.bytesize, byte_index, pcre_options(options) | LibPCRE::NO_UTF8_CHECK, ovector, ovector_size)
     # TODO: when `ret < -1`, it means PCRE error. It should handle correctly.
     ret >= 0
   end

From 2827c9c2af4b123730ab8e4ce5dbc1a6f04d420f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20M=C3=BCller?= <straightshoota@gmail.com>
Date: Mon, 28 Nov 2022 13:24:18 +0100
Subject: [PATCH 5/6] Extract implementation details from `Regex::MatchData`

---
 src/regex/match_data.cr | 49 ++-------------------------------------
 src/regex/pcre.cr       | 51 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/src/regex/match_data.cr b/src/regex/match_data.cr
index a3cda23f18a4..949bce29e603 100644
--- a/src/regex/match_data.cr
+++ b/src/regex/match_data.cr
@@ -16,6 +16,8 @@ class Regex
   # starting from `1`, so that `0` can be used to refer to the entire regular
   # expression without needing to capture it explicitly.
   struct MatchData
+    include Engine::MatchData
+
     # Returns the original regular expression.
     #
     # ```
@@ -39,10 +41,6 @@ class Regex
     # ```
     getter string : String
 
-    # :nodoc:
-    def initialize(@regex : Regex, @code : LibPCRE::Pcre, @string : String, @pos : Int32, @ovector : Int32*, @group_size : Int32)
-    end
-
     # Returns the number of elements in this match object.
     #
     # ```
@@ -132,16 +130,6 @@ class Regex
       byte_range(n) { |normalized_n| raise_capture_group_was_not_matched(normalized_n) }.end
     end
 
-    private def byte_range(n, &)
-      n += size if n < 0
-      range = Range.new(@ovector[n * 2], @ovector[n * 2 + 1], exclusive: true)
-      if range.begin < 0 || range.end < 0
-        yield n
-      else
-        range
-      end
-    end
-
     # Returns the match of the *n*th capture group, or `nil` if there isn't
     # an *n*th capture group.
     #
@@ -215,25 +203,6 @@ class Regex
       }
     end
 
-    private def fetch_impl(group_name : String)
-      max_start = -1
-      match = nil
-      exists = false
-      named_capture_number(group_name) do |n|
-        exists = true
-        start = byte_range(n) { nil }.try(&.begin) || next
-        if start > max_start
-          max_start = start
-          match = self[n]?
-        end
-      end
-      if match
-        match
-      else
-        yield exists
-      end
-    end
-
     # Returns all matches that are within the given range.
     def [](range : Range) : Array(String)
       self[*Indexable.range_to_index_and_count(range, size) || raise IndexError.new]
@@ -257,20 +226,6 @@ class Regex
       Array(String).new(count) { |i| self[start + i] }
     end
 
-    private def named_capture_number(group_name)
-      name_entry_size = LibPCRE.get_stringtable_entries(@code, group_name, out first, out last)
-      return if name_entry_size < 0
-
-      while first <= last
-        capture_number = (first[0].to_u16 << 8) | first[1].to_u16
-        yield capture_number
-
-        first += name_entry_size
-      end
-
-      nil
-    end
-
     # Returns the part of the original string before the match. If the match
     # starts at the start of the string, returns the empty string.
     #
diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr
index 0147381ec281..189e6cc1756a 100644
--- a/src/regex/pcre.cr
+++ b/src/regex/pcre.cr
@@ -91,7 +91,7 @@ module Regex::PCRE
     ovector_size = (@captures + 1) * 3
     ovector = Pointer(Int32).malloc(ovector_size)
     if internal_matches?(str, byte_index, options, ovector, ovector_size)
-      MatchData.new(self, @re, str, byte_index, ovector, @captures)
+      Regex::MatchData.new(self, @re, str, byte_index, ovector, @captures)
     end
   end
 
@@ -105,4 +105,53 @@ module Regex::PCRE
     # TODO: when `ret < -1`, it means PCRE error. It should handle correctly.
     ret >= 0
   end
+
+  module MatchData
+    # :nodoc:
+    def initialize(@regex : ::Regex, @code : LibPCRE::Pcre, @string : String, @pos : Int32, @ovector : Int32*, @group_size : Int32)
+    end
+
+    private def byte_range(n, &)
+      n += size if n < 0
+      range = Range.new(@ovector[n * 2], @ovector[n * 2 + 1], exclusive: true)
+      if range.begin < 0 || range.end < 0
+        yield n
+      else
+        range
+      end
+    end
+
+    private def fetch_impl(group_name : String)
+      max_start = -1
+      match = nil
+      exists = false
+      each_named_capture_number(group_name) do |n|
+        exists = true
+        start = byte_range(n) { nil }.try(&.begin) || next
+        if start > max_start
+          max_start = start
+          match = self[n]?
+        end
+      end
+      if match
+        match
+      else
+        yield exists
+      end
+    end
+
+    private def each_named_capture_number(group_name)
+      name_entry_size = LibPCRE.get_stringtable_entries(@code, group_name, out first, out last)
+      return if name_entry_size < 0
+
+      while first <= last
+        capture_number = (first[0].to_u16 << 8) | first[1].to_u16
+        yield capture_number
+
+        first += name_entry_size
+      end
+
+      nil
+    end
+  end
 end

From 1e2557ae9906b8205fa9561739b2c151fb8df167 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20M=C3=BCller?= <straightshoota@gmail.com>
Date: Mon, 12 Dec 2022 20:28:32 +0100
Subject: [PATCH 6/6] Make `Regex::PCRE#initialize` private

---
 src/regex/pcre.cr | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr
index 189e6cc1756a..ff68509bed9e 100644
--- a/src/regex/pcre.cr
+++ b/src/regex/pcre.cr
@@ -2,8 +2,7 @@ require "./lib_pcre"
 
 # :nodoc:
 module Regex::PCRE
-  # :nodoc:
-  def initialize(*, _source source, _options @options)
+  private def initialize(*, _source source, _options @options)
     # PCRE's pattern must have their null characters escaped
     source = source.gsub('\u{0}', "\\0")
     @source = source