From e5eef153690400d0b7a0669d009c13cabb97c4e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Wed, 30 Nov 2022 18:23:22 +0100 Subject: [PATCH 1/6] Extract common code into `MatchData#byte_range` helper --- src/regex/match_data.cr | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/regex/match_data.cr b/src/regex/match_data.cr index 085c829073e8..4b2d9164d43d 100644 --- a/src/regex/match_data.cr +++ b/src/regex/match_data.cr @@ -109,10 +109,7 @@ class Regex # ``` def byte_begin(n = 0) : Int32 check_index_out_of_bounds n - n += size if n < 0 - value = @ovector[n * 2] - raise_capture_group_was_not_matched(n) if value < 0 - value + byte_range(n) { |normalized_n| raise_capture_group_was_not_matched(normalized_n) }.begin end # Returns the position of the next byte after the match. @@ -132,10 +129,17 @@ class Regex # ``` def byte_end(n = 0) : Int32 check_index_out_of_bounds n + byte_range(n) { |normalized_n| raise_capture_group_was_not_matched(normalized_n) }.end + end + + private def byte_range(n, &) n += size if n < 0 - value = @ovector[n * 2 + 1] - raise_capture_group_was_not_matched(n) if value < 0 - value + range = Range.new(@ovector[n * 2], @ovector[n * 2 + 1], exclusive: true) + if range.begin < 0 || range.end < 0 + yield n + else + range + end end # Returns the match of the *n*th capture group, or `nil` if there isn't @@ -151,11 +155,8 @@ class Regex def []?(n : Int) : String? return unless valid_group?(n) - n += size if n < 0 - start = @ovector[n * 2] - finish = @ovector[n * 2 + 1] - return if start < 0 - @string.byte_slice(start, finish - start) + range = byte_range(n) { return nil } + @string.byte_slice(range.begin, range.end - range.begin) end # Returns the match of the *n*th capture group, or raises an `IndexError` @@ -167,11 +168,9 @@ class Regex # ``` def [](n : Int) : String check_index_out_of_bounds n - n += size if n < 0 - value = self[n]? - raise_capture_group_was_not_matched n if value.nil? - value + range = byte_range(n) { |normalized_n| raise_capture_group_was_not_matched(normalized_n) } + @string.byte_slice(range.begin, range.end - range.begin) end # Returns the match of the capture group named by *group_name*, or From 4897e150403effe0ac927bd57467d481c564f19d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Wed, 30 Nov 2022 23:18:53 +0100 Subject: [PATCH 2/6] Extract common code into `Matchdata#fetch_impl` helper --- src/regex/match_data.cr | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/regex/match_data.cr b/src/regex/match_data.cr index 4b2d9164d43d..a3cda23f18a4 100644 --- a/src/regex/match_data.cr +++ b/src/regex/match_data.cr @@ -188,16 +188,7 @@ class Regex # "Crystal".match(/(?Cr).*(?al)/).not_nil!["ok"]? # => "al" # ``` def []?(group_name : String) : String? - max_start = -1 - match = nil - named_capture_number(group_name) do |n| - start = @ovector[n * 2] - if start > max_start - max_start = start - match = self[n]? - end - end - match + fetch_impl(group_name) { nil } end # Returns the match of the capture group named by *group_name*, or @@ -215,14 +206,32 @@ class Regex # "Crystal".match(/(?Cr).*(?al)/).not_nil!["ok"] # => "al" # ``` def [](group_name : String) : String - match = self[group_name]? - unless match - named_capture_number(group_name) do + fetch_impl(group_name) { |exists| + if exists raise KeyError.new("Capture group '#{group_name}' was not matched") + else + raise KeyError.new("Capture group '#{group_name}' does not exist") + end + } + end + + private def fetch_impl(group_name : String) + max_start = -1 + match = nil + exists = false + named_capture_number(group_name) do |n| + exists = true + start = byte_range(n) { nil }.try(&.begin) || next + if start > max_start + max_start = start + match = self[n]? end - raise KeyError.new("Capture group '#{group_name}' does not exist") end - match + if match + match + else + yield exists + end end # Returns all matches that are within the given range. From 3ed9d7f2e016923ac7ccce8d16eaec5c525eaf10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Sat, 26 Nov 2022 00:50:50 +0100 Subject: [PATCH 3/6] Extract engine details into `Regex::Engine` --- src/regex.cr | 90 ++++++++------------------------------------- src/regex/engine.cr | 4 ++ src/regex/pcre.cr | 88 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 75 deletions(-) create mode 100644 src/regex/engine.cr create mode 100644 src/regex/pcre.cr diff --git a/src/regex.cr b/src/regex.cr index b3ccd7bf0bec..136671e2f65c 100644 --- a/src/regex.cr +++ b/src/regex.cr @@ -1,4 +1,5 @@ -require "./regex/*" +require "./regex/engine" +require "./regex/match_data" # A `Regex` represents a regular expression, a pattern that describes the # contents of strings. A `Regex` can determine whether or not a string matches @@ -195,6 +196,8 @@ require "./regex/*" # `Hash` of `String` => `Int32`, and therefore requires named capture groups to have # unique names within a single `Regex`. class Regex + include Regex::Engine + # List of metacharacters that need to be escaped. # # See `Regex.needs_escape?` and `Regex.escape`. @@ -253,28 +256,8 @@ class Regex # options = Regex::Options::IGNORE_CASE | Regex::Options::EXTENDED # Regex.new("dog", options) # => /dog/ix # ``` - def initialize(source : String, @options : Options = Options::None) - # PCRE's pattern must have their null characters escaped - source = source.gsub('\u{0}', "\\0") - @source = source - - @re = LibPCRE.compile(@source, (options | Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES | Options::UCP), out errptr, out erroffset, nil) - raise ArgumentError.new("#{String.new(errptr)} at #{erroffset}") if @re.null? - @extra = LibPCRE.study(@re, LibPCRE::STUDY_JIT_COMPILE, out studyerrptr) - if @extra.null? && studyerrptr - {% unless flag?(:interpreted) %} - LibPCRE.free.call @re.as(Void*) - {% end %} - raise ArgumentError.new("#{String.new(studyerrptr)}") - end - LibPCRE.full_info(@re, nil, LibPCRE::INFO_CAPTURECOUNT, out @captures) - end - - def finalize - LibPCRE.free_study @extra - {% unless flag?(:interpreted) %} - LibPCRE.free.call @re.as(Void*) - {% end %} + def self.new(source : String, options : Options = Options::None) + new(_source: source, _options: options) end # Determines Regex's source validity. If it is, `nil` is returned. @@ -285,15 +268,7 @@ class Regex # Regex.error?("(foo|bar") # => "missing ) at 8" # ``` def self.error?(source) : String? - re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil) - if re - {% unless flag?(:interpreted) %} - LibPCRE.free.call re.as(Void*) - {% end %} - nil - else - "#{String.new(errptr)} at #{erroffset}" - end + Engine.error_impl(source) end # Returns `true` if *char* need to be escaped, `false` otherwise. @@ -485,12 +460,10 @@ class Regex # ``` def match(str, pos = 0, options = Regex::Options::None) : MatchData? if byte_index = str.char_index_to_byte_index(pos) - match = match_at_byte_index(str, byte_index, options) + $~ = match_at_byte_index(str, byte_index, options) else - match = nil + $~ = nil end - - $~ = match end # Match at byte index. Matches a regular expression against `String` @@ -504,17 +477,11 @@ class Regex # /(.)(.)/.match_at_byte_index("クリスタル", 3).try &.[2] # => "ス" # ``` def match_at_byte_index(str, byte_index = 0, options = Regex::Options::None) : MatchData? - return ($~ = nil) if byte_index > str.bytesize - - ovector_size = (@captures + 1) * 3 - ovector = Pointer(Int32).malloc(ovector_size) - if internal_matches?(str, byte_index, options, ovector, ovector_size) - match = MatchData.new(self, @re, str, byte_index, ovector, @captures) + if byte_index > str.bytesize + $~ = nil else - match = nil + $~ = match_impl(str, byte_index, options) end - - $~ = match end # Match at character index. It behaves like `#match`, however it returns `Bool` value. @@ -540,14 +507,7 @@ class Regex def matches_at_byte_index?(str, byte_index = 0, options = Regex::Options::None) : Bool return false if byte_index > str.bytesize - internal_matches?(str, byte_index, options, nil, 0) - end - - # Calls `pcre_exec` C function, and handles returning value. - private def internal_matches?(str, byte_index, options, ovector, ovector_size) - ret = LibPCRE.exec(@re, @extra, str, str.bytesize, byte_index, (options | Options::NO_UTF8_CHECK), ovector, ovector_size) - # TODO: when `ret < -1`, it means PCRE error. It should handle correctly. - ret >= 0 + matches_impl(str, byte_index, options) end # Returns a `Hash` where the values are the names of capture groups and the @@ -561,26 +521,7 @@ class Regex # /(.)(?.)(.)(?.)(.)/.name_table # => {4 => "bar", 2 => "foo"} # ``` def name_table : Hash(Int32, String) - LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMECOUNT, out name_count) - LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMEENTRYSIZE, out name_entry_size) - table_pointer = Pointer(UInt8).null - LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMETABLE, pointerof(table_pointer).as(Pointer(Int32))) - name_table = table_pointer.to_slice(name_entry_size*name_count) - - lookup = Hash(Int32, String).new - - name_count.times do |i| - capture_offset = i * name_entry_size - capture_number = ((name_table[capture_offset].to_u16 << 8)).to_i32 | name_table[capture_offset + 1] - - name_offset = capture_offset + 2 - checked = name_table[name_offset, name_entry_size - 3] - name = String.new(checked.to_unsafe) - - lookup[capture_number] = name - end - - lookup + name_table_impl end # Returns the number of (named & non-named) capture groups. @@ -592,8 +533,7 @@ class Regex # /(.)|(.)/.capture_count # => 2 # ``` def capture_count : Int32 - LibPCRE.full_info(@re, @extra, LibPCRE::INFO_CAPTURECOUNT, out capture_count) - capture_count + capture_count_impl end # Convert to `String` in subpattern format. Produces a `String` which can be diff --git a/src/regex/engine.cr b/src/regex/engine.cr new file mode 100644 index 000000000000..ad69e5d034bf --- /dev/null +++ b/src/regex/engine.cr @@ -0,0 +1,4 @@ +require "./pcre" + +# :nodoc: +alias Regex::Engine = PCRE diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr new file mode 100644 index 000000000000..400106e57102 --- /dev/null +++ b/src/regex/pcre.cr @@ -0,0 +1,88 @@ +require "./lib_pcre" + +# :nodoc: +module Regex::PCRE + # :nodoc: + def initialize(*, _source source, _options @options) + # PCRE's pattern must have their null characters escaped + source = source.gsub('\u{0}', "\\0") + @source = source + + @re = LibPCRE.compile(@source, (options | ::Regex::Options::UTF_8 | ::Regex::Options::NO_UTF8_CHECK | ::Regex::Options::DUPNAMES | ::Regex::Options::UCP), out errptr, out erroffset, nil) + raise ArgumentError.new("#{String.new(errptr)} at #{erroffset}") if @re.null? + @extra = LibPCRE.study(@re, LibPCRE::STUDY_JIT_COMPILE, out studyerrptr) + if @extra.null? && studyerrptr + {% unless flag?(:interpreted) %} + LibPCRE.free.call @re.as(Void*) + {% end %} + raise ArgumentError.new("#{String.new(studyerrptr)}") + end + LibPCRE.full_info(@re, nil, LibPCRE::INFO_CAPTURECOUNT, out @captures) + end + + def finalize + LibPCRE.free_study @extra + {% unless flag?(:interpreted) %} + LibPCRE.free.call @re.as(Void*) + {% end %} + end + + protected def self.error_impl(source) + re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil) + if re + {% unless flag?(:interpreted) %} + LibPCRE.free.call re.as(Void*) + {% end %} + nil + else + "#{String.new(errptr)} at #{erroffset}" + end + end + + private def name_table_impl + LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMECOUNT, out name_count) + LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMEENTRYSIZE, out name_entry_size) + table_pointer = Pointer(UInt8).null + LibPCRE.full_info(@re, @extra, LibPCRE::INFO_NAMETABLE, pointerof(table_pointer).as(Pointer(Int32))) + name_table = table_pointer.to_slice(name_entry_size*name_count) + + lookup = Hash(Int32, String).new + + name_count.times do |i| + capture_offset = i * name_entry_size + capture_number = ((name_table[capture_offset].to_u16 << 8)).to_i32 | name_table[capture_offset + 1] + + name_offset = capture_offset + 2 + checked = name_table[name_offset, name_entry_size - 3] + name = String.new(checked.to_unsafe) + + lookup[capture_number] = name + end + + lookup + end + + private def capture_count_impl + LibPCRE.full_info(@re, @extra, LibPCRE::INFO_CAPTURECOUNT, out capture_count) + capture_count + end + + private def match_impl(str, byte_index, options) + ovector_size = (@captures + 1) * 3 + ovector = Pointer(Int32).malloc(ovector_size) + if internal_matches?(str, byte_index, options, ovector, ovector_size) + MatchData.new(self, @re, str, byte_index, ovector, @captures) + end + end + + private def matches_impl(str, byte_index, options) + internal_matches?(str, byte_index, options, nil, 0) + end + + # Calls `pcre_exec` C function, and handles returning value. + private def internal_matches?(str, byte_index, options, ovector, ovector_size) + ret = LibPCRE.exec(@re, @extra, str, str.bytesize, byte_index, (options | ::Regex::Options::NO_UTF8_CHECK), ovector, ovector_size) + # TODO: when `ret < -1`, it means PCRE error. It should handle correctly. + ret >= 0 + end +end From 38d614df5a27a4d6369221b2e73823419cefe6e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Sat, 26 Nov 2022 01:07:41 +0100 Subject: [PATCH 4/6] Extract implementation details from `Regex::Options` --- src/regex/lib_pcre.cr | 10 ++++++++++ src/regex/pcre.cr | 26 +++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/regex/lib_pcre.cr b/src/regex/lib_pcre.cr index 2182153870f0..cf32142c5358 100644 --- a/src/regex/lib_pcre.cr +++ b/src/regex/lib_pcre.cr @@ -2,6 +2,16 @@ lib LibPCRE alias Int = LibC::Int + CASELESS = 0x00000001 + MULTILINE = 0x00000002 + DOTALL = 0x00000004 + EXTENDED = 0x00000008 + ANCHORED = 0x00000010 + UTF8 = 0x00000800 + NO_UTF8_CHECK = 0x00002000 + DUPNAMES = 0x00080000 + UCP = 0x20000000 + type Pcre = Void* type PcreExtra = Void* fun compile = pcre_compile(pattern : UInt8*, options : Int, errptr : UInt8**, erroffset : Int*, tableptr : Void*) : Pcre diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr index 400106e57102..0147381ec281 100644 --- a/src/regex/pcre.cr +++ b/src/regex/pcre.cr @@ -8,7 +8,7 @@ module Regex::PCRE source = source.gsub('\u{0}', "\\0") @source = source - @re = LibPCRE.compile(@source, (options | ::Regex::Options::UTF_8 | ::Regex::Options::NO_UTF8_CHECK | ::Regex::Options::DUPNAMES | ::Regex::Options::UCP), out errptr, out erroffset, nil) + @re = LibPCRE.compile(@source, pcre_options(options) | LibPCRE::UTF8 | LibPCRE::NO_UTF8_CHECK | LibPCRE::DUPNAMES | LibPCRE::UCP, out errptr, out erroffset, nil) raise ArgumentError.new("#{String.new(errptr)} at #{erroffset}") if @re.null? @extra = LibPCRE.study(@re, LibPCRE::STUDY_JIT_COMPILE, out studyerrptr) if @extra.null? && studyerrptr @@ -20,6 +20,26 @@ module Regex::PCRE LibPCRE.full_info(@re, nil, LibPCRE::INFO_CAPTURECOUNT, out @captures) end + private def pcre_options(options) + flag = 0 + options.each do |option| + flag |= case option + when .ignore_case? then LibPCRE::CASELESS + when .multiline? then LibPCRE::DOTALL | LibPCRE::MULTILINE + when .extended? then LibPCRE::EXTENDED + when .anchored? then LibPCRE::ANCHORED + when .utf_8? then LibPCRE::UTF8 + when .no_utf8_check? then LibPCRE::NO_UTF8_CHECK + when .dupnames? then LibPCRE::DUPNAMES + when .ucp? then LibPCRE::UCP + else + # Unnamed values are explicitly used PCRE options, just pass them through: + option.value + end + end + flag + end + def finalize LibPCRE.free_study @extra {% unless flag?(:interpreted) %} @@ -28,7 +48,7 @@ module Regex::PCRE end protected def self.error_impl(source) - re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil) + re = LibPCRE.compile(source, LibPCRE::UTF8 | LibPCRE::NO_UTF8_CHECK | LibPCRE::DUPNAMES, out errptr, out erroffset, nil) if re {% unless flag?(:interpreted) %} LibPCRE.free.call re.as(Void*) @@ -81,7 +101,7 @@ module Regex::PCRE # Calls `pcre_exec` C function, and handles returning value. private def internal_matches?(str, byte_index, options, ovector, ovector_size) - ret = LibPCRE.exec(@re, @extra, str, str.bytesize, byte_index, (options | ::Regex::Options::NO_UTF8_CHECK), ovector, ovector_size) + ret = LibPCRE.exec(@re, @extra, str, str.bytesize, byte_index, pcre_options(options) | LibPCRE::NO_UTF8_CHECK, ovector, ovector_size) # TODO: when `ret < -1`, it means PCRE error. It should handle correctly. ret >= 0 end From 2827c9c2af4b123730ab8e4ce5dbc1a6f04d420f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Mon, 28 Nov 2022 13:24:18 +0100 Subject: [PATCH 5/6] Extract implementation details from `Regex::MatchData` --- src/regex/match_data.cr | 49 ++------------------------------------- src/regex/pcre.cr | 51 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 48 deletions(-) diff --git a/src/regex/match_data.cr b/src/regex/match_data.cr index a3cda23f18a4..949bce29e603 100644 --- a/src/regex/match_data.cr +++ b/src/regex/match_data.cr @@ -16,6 +16,8 @@ class Regex # starting from `1`, so that `0` can be used to refer to the entire regular # expression without needing to capture it explicitly. struct MatchData + include Engine::MatchData + # Returns the original regular expression. # # ``` @@ -39,10 +41,6 @@ class Regex # ``` getter string : String - # :nodoc: - def initialize(@regex : Regex, @code : LibPCRE::Pcre, @string : String, @pos : Int32, @ovector : Int32*, @group_size : Int32) - end - # Returns the number of elements in this match object. # # ``` @@ -132,16 +130,6 @@ class Regex byte_range(n) { |normalized_n| raise_capture_group_was_not_matched(normalized_n) }.end end - private def byte_range(n, &) - n += size if n < 0 - range = Range.new(@ovector[n * 2], @ovector[n * 2 + 1], exclusive: true) - if range.begin < 0 || range.end < 0 - yield n - else - range - end - end - # Returns the match of the *n*th capture group, or `nil` if there isn't # an *n*th capture group. # @@ -215,25 +203,6 @@ class Regex } end - private def fetch_impl(group_name : String) - max_start = -1 - match = nil - exists = false - named_capture_number(group_name) do |n| - exists = true - start = byte_range(n) { nil }.try(&.begin) || next - if start > max_start - max_start = start - match = self[n]? - end - end - if match - match - else - yield exists - end - end - # Returns all matches that are within the given range. def [](range : Range) : Array(String) self[*Indexable.range_to_index_and_count(range, size) || raise IndexError.new] @@ -257,20 +226,6 @@ class Regex Array(String).new(count) { |i| self[start + i] } end - private def named_capture_number(group_name) - name_entry_size = LibPCRE.get_stringtable_entries(@code, group_name, out first, out last) - return if name_entry_size < 0 - - while first <= last - capture_number = (first[0].to_u16 << 8) | first[1].to_u16 - yield capture_number - - first += name_entry_size - end - - nil - end - # Returns the part of the original string before the match. If the match # starts at the start of the string, returns the empty string. # diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr index 0147381ec281..189e6cc1756a 100644 --- a/src/regex/pcre.cr +++ b/src/regex/pcre.cr @@ -91,7 +91,7 @@ module Regex::PCRE ovector_size = (@captures + 1) * 3 ovector = Pointer(Int32).malloc(ovector_size) if internal_matches?(str, byte_index, options, ovector, ovector_size) - MatchData.new(self, @re, str, byte_index, ovector, @captures) + Regex::MatchData.new(self, @re, str, byte_index, ovector, @captures) end end @@ -105,4 +105,53 @@ module Regex::PCRE # TODO: when `ret < -1`, it means PCRE error. It should handle correctly. ret >= 0 end + + module MatchData + # :nodoc: + def initialize(@regex : ::Regex, @code : LibPCRE::Pcre, @string : String, @pos : Int32, @ovector : Int32*, @group_size : Int32) + end + + private def byte_range(n, &) + n += size if n < 0 + range = Range.new(@ovector[n * 2], @ovector[n * 2 + 1], exclusive: true) + if range.begin < 0 || range.end < 0 + yield n + else + range + end + end + + private def fetch_impl(group_name : String) + max_start = -1 + match = nil + exists = false + each_named_capture_number(group_name) do |n| + exists = true + start = byte_range(n) { nil }.try(&.begin) || next + if start > max_start + max_start = start + match = self[n]? + end + end + if match + match + else + yield exists + end + end + + private def each_named_capture_number(group_name) + name_entry_size = LibPCRE.get_stringtable_entries(@code, group_name, out first, out last) + return if name_entry_size < 0 + + while first <= last + capture_number = (first[0].to_u16 << 8) | first[1].to_u16 + yield capture_number + + first += name_entry_size + end + + nil + end + end end From 1e2557ae9906b8205fa9561739b2c151fb8df167 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Mon, 12 Dec 2022 20:28:32 +0100 Subject: [PATCH 6/6] Make `Regex::PCRE#initialize` private --- src/regex/pcre.cr | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr index 189e6cc1756a..ff68509bed9e 100644 --- a/src/regex/pcre.cr +++ b/src/regex/pcre.cr @@ -2,8 +2,7 @@ require "./lib_pcre" # :nodoc: module Regex::PCRE - # :nodoc: - def initialize(*, _source source, _options @options) + private def initialize(*, _source source, _options @options) # PCRE's pattern must have their null characters escaped source = source.gsub('\u{0}', "\\0") @source = source