diff --git a/spec/std/regex_spec.cr b/spec/std/regex_spec.cr index 5935e85060fd..c38194bc8785 100644 --- a/spec/std/regex_spec.cr +++ b/spec/std/regex_spec.cr @@ -200,16 +200,12 @@ describe "Regex" do /foo/.matches?("foo", options: Regex::Options::ANCHORED).should be_true end - it "doesn't crash with a large single line string" do - {% if Regex::Engine.resolve.name == "Regex::PCRE" %} - LibPCRE.config LibPCRE::CONFIG_JIT, out jit_enabled - pending! "PCRE JIT mode not available." unless 1 == jit_enabled - {% end %} + it "matches a large single line string" do + LibPCRE.config LibPCRE::CONFIG_JIT, out jit_enabled + pending! "PCRE JIT mode not available." unless 1 == jit_enabled str = File.read(datapath("large_single_line_string.txt")) - str.matches?(/^(?:[A-Za-z0-9+\/]{4})*(?:[A-Za-z0-9+\/]{2}==|[A-Za-z0-9+\/]{3}=)?$/) - # We don't care whether this actually matches or not, it's just to make - # sure the engine does not stack overflow with a large string. + str.matches?(/^(?:[A-Za-z0-9+\/]{4})*(?:[A-Za-z0-9+\/]{2}==|[A-Za-z0-9+\/]{3}=)?$/).should be_false end end @@ -426,12 +422,6 @@ describe "Regex" do it ".error?" do Regex.error?("(foo|bar)").should be_nil - Regex.error?("(foo|bar").should eq( - if Regex::Engine.to_s == "Regex::PCRE2" - "missing closing parenthesis at 8" - else - "missing ) at 8" - end - ) + Regex.error?("(foo|bar").should eq "missing ) at 8" end end diff --git a/src/regex/engine.cr b/src/regex/engine.cr index 766917f87dd2..ad69e5d034bf 100644 --- a/src/regex/engine.cr +++ b/src/regex/engine.cr @@ -1,11 +1,4 @@ -{% if flag?(:use_pcre2) || (!flag?(:use_pcre) && !flag?(:win32) && `hash pkg-config 2> /dev/null && pkg-config --silence-errors --modversion libpcre2-8 || printf %s false` != "false") %} - require "./pcre2" +require "./pcre" - # :nodoc: - alias Regex::Engine = PCRE2 -{% else %} - require "./pcre" - - # :nodoc: - alias Regex::Engine = PCRE -{% end %} +# :nodoc: +alias Regex::Engine = PCRE diff --git a/src/regex/lib_pcre2.cr b/src/regex/lib_pcre2.cr deleted file mode 100644 index 922c492b7e1a..000000000000 --- a/src/regex/lib_pcre2.cr +++ /dev/null @@ -1,89 +0,0 @@ -@[Link("pcre2-8")] -lib LibPCRE2 - alias Int = LibC::Int - - UNSET = ~LibC::SizeT.new(0) - - ANCHORED = 0x80000000 - NO_UTF_CHECK = 0x40000000 - ENDANCHORED = 0x20000000 - - ALLOW_EMPTY_CLASS = 0x00000001 - ALT_BSUX = 0x00000002 - AUTO_CALLOUT = 0x00000004 - CASELESS = 0x00000008 - DOLLAR_ENDONLY = 0x00000010 - DOTALL = 0x00000020 - DUPNAMES = 0x00000040 - EXTENDED = 0x00000080 - FIRSTLINE = 0x00000100 - MATCH_UNSET_BACKREF = 0x00000200 - MULTILINE = 0x00000400 - NEVER_UCP = 0x00000800 - NEVER_UTF = 0x00001000 - NO_AUTO_CAPTURE = 0x00002000 - NO_AUTO_POSSESS = 0x00004000 - NO_DOTSTAR_ANCHOR = 0x00008000 - NO_START_OPTIMIZE = 0x00010000 - UCP = 0x00020000 - UNGREEDY = 0x00040000 - UTF = 0x00080000 - NEVER_BACKSLASH_C = 0x00100000 - ALT_CIRCUMFLEX = 0x00200000 - ALT_VERBNAMES = 0x00400000 - USE_OFFSET_LIMIT = 0x00800000 - EXTENDED_MORE = 0x01000000 - LITERAL = 0x02000000 - MATCH_INVALID_UTF = 0x04000000 - - ERROR_NOMATCH = -1 - - INFO_ALLOPTIONS = 0 - INFO_ARGOPTIONS = 1 - INFO_BACKREFMAX = 2 - INFO_BSR = 3 - INFO_CAPTURECOUNT = 4 - INFO_FIRSTCODEUNIT = 5 - INFO_FIRSTCODETYPE = 6 - INFO_FIRSTBITMAP = 7 - INFO_HASCRORLF = 8 - INFO_JCHANGED = 9 - INFO_JITSIZE = 10 - INFO_LASTCODEUNIT = 11 - INFO_LASTCODETYPE = 12 - INFO_MATCHEMPTY = 13 - INFO_MATCHLIMIT = 14 - INFO_MAXLOOKBEHIND = 15 - INFO_MINLENGTH = 16 - INFO_NAMECOUNT = 17 - INFO_NAMEENTRYSIZE = 18 - INFO_NAMETABLE = 19 - INFO_NEWLINE = 20 - INFO_DEPTHLIMIT = 21 - INFO_RECURSIONLIMIT = 21 # Obsolete synonym - INFO_SIZE = 22 - INFO_HASBACKSLASHC = 23 - INFO_FRAMESIZE = 24 - INFO_HEAPLIMIT = 25 - INFO_EXTRAOPTIONS = 26 - - type Code = Void* - type CompileContext = Void* - type MatchData = Void* - - fun get_error_message = pcre2_get_error_message_8(errorcode : Int, buffer : UInt8*, bufflen : LibC::SizeT) : Int - - fun compile = pcre2_compile_8(pattern : UInt8*, length : LibC::SizeT, options : UInt32, errorcode : LibC::SizeT*, erroroffset : Int*, ccontext : CompileContext*) : Code* - fun code_free = pcre2_code_free_8(code : Code*) : Void - - fun pattern_info = pcre2_pattern_info_8(code : Code*, what : UInt32, where : Void*) : Int - - fun match = pcre2_match_8(code : Code*, subject : UInt8*, length : LibC::SizeT, startoffset : LibC::SizeT, options : UInt32, match_data : MatchData*, mcontext : Void*) : Int - fun match_data_create_from_pattern = pcre2_match_data_create_from_pattern_8(code : Code*, gcontext : Void*) : MatchData* - fun match_data_free = pcre2_match_data_free_8(match_data : MatchData*) : Void - - fun substring_nametable_scan = pcre2_substring_nametable_scan_8(code : Code*, name : UInt8*, first : UInt8*, last : UInt8*) : Int - - fun get_ovector_pointer = pcre2_get_ovector_pointer_8(match_data : MatchData*) : LibC::SizeT* - fun get_ovector_count = pcre2_get_ovector_count_8(match_data : MatchData*) : UInt32 -end diff --git a/src/regex/pcre2.cr b/src/regex/pcre2.cr deleted file mode 100644 index 3eea20280268..000000000000 --- a/src/regex/pcre2.cr +++ /dev/null @@ -1,176 +0,0 @@ -require "./lib_pcre2" - -# :nodoc: -module Regex::PCRE2 - @re : LibPCRE2::Code* - - # :nodoc: - def initialize(*, _source @source : String, _options @options) - @re = PCRE2.compile(source, pcre2_options(options) | LibPCRE2::UTF | LibPCRE2::NO_UTF_CHECK | LibPCRE2::DUPNAMES | LibPCRE2::UCP) do |error_message| - raise ArgumentError.new(error_message) - end - end - - protected def self.compile(source, options) - if res = LibPCRE2.compile(source, source.bytesize, options, out errorcode, out erroroffset, nil) - res - else - message = String.new(256) do |buffer| - bytesize = LibPCRE2.get_error_message(errorcode, buffer, 256) - {bytesize, 0} - end - yield "#{message} at #{erroroffset}" - end - end - - private def pcre2_options(options) - flag = 0 - options.each do |option| - flag |= case option - when .ignore_case? then LibPCRE2::CASELESS - when .multiline? then LibPCRE2::DOTALL | LibPCRE2::MULTILINE - when .extended? then LibPCRE2::EXTENDED - when .anchored? then LibPCRE2::ANCHORED - when .utf_8? then LibPCRE2::UTF - when .no_utf8_check? then LibPCRE2::NO_UTF_CHECK - when .dupnames? then LibPCRE2::DUPNAMES - when .ucp? then LibPCRE2::UCP - else - raise "unreachable" - end - end - flag - end - - def finalize - {% unless flag?(:interpreted) %} - LibPCRE2.code_free @re - {% end %} - end - - protected def self.error_impl(source) - code = PCRE2.compile(source, LibPCRE2::UTF | LibPCRE2::NO_UTF_CHECK | LibPCRE2::DUPNAMES | LibPCRE2::UCP) do |error_message| - return error_message - end - - LibPCRE2.code_free code - - nil - end - - private def pattern_info(what) - value = uninitialized UInt32 - pattern_info(what, pointerof(value)) - value - end - - private def pattern_info(what, where) - ret = LibPCRE2.pattern_info(@re, what, where) - if ret != 0 - raise "error pattern_info #{what}: #{ret}" - end - end - - private def name_table_impl - lookup = Hash(Int32, String).new - - each_capture_group do |capture_number, name_entry| - lookup[capture_number] = String.new(name_entry.to_unsafe + 2) - end - - lookup - end - - # :nodoc: - def each_capture_group - name_table = uninitialized UInt8* - pattern_info(LibPCRE2::INFO_NAMETABLE, pointerof(name_table)) - - name_entry_size = pattern_info(LibPCRE2::INFO_NAMEENTRYSIZE) - - name_count = pattern_info(LibPCRE2::INFO_NAMECOUNT) - name_count.times do - capture_number = (name_table[0] << 8) | name_table[1] - - yield capture_number, Slice.new(name_table, name_entry_size) - - name_table += name_entry_size - end - end - - private def capture_count_impl - pattern_info(LibPCRE2::INFO_CAPTURECOUNT).to_i32 - end - - private def match_impl(str, byte_index, options) - match_data = match_data(str, byte_index, options) || return - - ovector = LibPCRE2.get_ovector_pointer(match_data) - ovector_count = LibPCRE2.get_ovector_count(match_data) - LibPCRE2.match_data_free(match_data) - - ::Regex::MatchData.new(self, @re, str, byte_index, ovector, ovector_count.to_i32 - 1) - end - - private def matches_impl(str, byte_index, options) - if match_data = match_data(str, byte_index, options) - LibPCRE2.match_data_free(match_data) - true - else - false - end - end - - private def match_data(str, byte_index, options) - match_data = LibPCRE2.match_data_create_from_pattern(@re, nil) - match_count = LibPCRE2.match(@re, str, str.bytesize, byte_index, pcre2_options(options) | LibPCRE2::NO_UTF_CHECK, match_data, nil) - - if match_count < 0 - LibPCRE2.match_data_free(match_data) - case match_count - when LibPCRE2::ERROR_NOMATCH - return - else - raise "error!" - end - end - - match_data - end - - module MatchData - # :nodoc: - def initialize(@regex : Regex, @code : LibPCRE2::Code*, @string : String, @pos : Int32, @ovector : UInt64*, @group_size : Int32) - end - - private def byte_range(n, &) - n += size if n < 0 - range = Range.new(@ovector[n * 2].to_i32!, @ovector[n * 2 + 1].to_i32!, exclusive: true) - if range.begin < 0 || range.end < 0 - yield n - else - range - end - end - - private def fetch_impl(group_name : String) - selected_range = nil - exists = false - @regex.each_capture_group do |number, name_entry| - if name_entry[2, group_name.bytesize] == group_name.to_slice - exists = true - range = byte_range(number) { nil } - if (range && selected_range && range.begin > selected_range.begin) || !selected_range - selected_range = range - end - end - end - - if selected_range - @string.byte_slice(selected_range.begin, selected_range.end - selected_range.begin) - else - yield exists - end - end - end -end