From 4025c5267cc12de01575dae3a3f170c63c8c0bd9 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Sat, 9 Nov 2019 21:08:36 +0100
Subject: [PATCH 01/10] add missing documentation to String methods

---
 src/string.cr | 222 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 204 insertions(+), 18 deletions(-)

diff --git a/src/string.cr b/src/string.cr
index 673bd0cbe3dc..dac1b98bf53f 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -725,10 +725,12 @@ class String
     end
   end
 
-  # Returns the `Char` at the given *index*, or raises `IndexError` if out of bounds.
+  # Returns the `Char` at the given *index*.
   #
   # Negative indices can be used to start counting from the end of the string.
   #
+  # Raises `IndexError` if the *index* is out of range.
+  #
   # ```
   # "hello"[0]  # => 'h'
   # "hello"[1]  # => 'e'
@@ -849,11 +851,32 @@ class String
     self[regex, group]?.not_nil!
   end
 
-  def char_at(index : Int)
+  # Returns the `Char` at the given *index*.
+  #
+  # Negative indices can be used to start counting from the end of the string.
+  #
+  # Raises `IndexError` if the *index* is out of range.
+  #
+  # ```
+  # "hello".char_at(0)  # => 'h'
+  # "hello".char_at(1)  # => 'e'
+  # "hello".char_at(-1) # => 'o'
+  # "hello".char_at(-2) # => 'l'
+  # "hello".char_at(5)  # raises IndexError
+  # ```
+  def char_at(index : Int) : Char
     char_at(index) { raise IndexError.new }
   end
 
-  def char_at(index : Int)
+  # Returns the `Char` at the given *index*, or yields if out of bounds.
+  #
+  # Negative indices can be used to start counting from the end of the string.
+  #
+  # ```
+  # "hello".char_at(4) { 'x' } # => 'o'
+  # "hello".char_at(5) { 'x' } # => 'x'
+  # ```
+  def char_at(index : Int, &)
     if ascii_only?
       byte = byte_at?(index)
       if byte
@@ -874,11 +897,42 @@ class String
     end
   end
 
-  def byte_slice(start : Int, count : Int)
+  # Returns a new string consisted of *count* bytes starting at *start* byte.
+  #
+  # The *start* argument can be negative to start counting
+  # from the end of the string.
+  # If `count` is bigger than number of bytes from *start* to `bytelen`,
+  # only remaining bytes are returned.
+  #
+  # Be careful when working with multibyte characters - they can be splitted,
+  # which may lead to invalid UTF-8 values. These,
+  # when asked as chars, will use the unicode replacement �.
+  #
+  # Raises `IndexError` if the *start* index is out of range.
+  #
+  # Raises `ArgumentError` if *count* is negative.
+  #
+  # ```
+  # "hello".byte_slice(0, 2)   # => "he"
+  # "hello".byte_slice(0, 100) # => "hello"
+  # "hello".byte_slice(-2, 3)  # => "he"
+  # "hello".byte_slice(-2, 5)  # => "he"
+  # "hello".byte_slice(-2, 5)  # => "he"
+  # "¥hello".byte_slice(0, 2)  # => "¥"
+  # "¥hello".byte_slice(2, 2)  # => "he"
+  # "¥hello".byte_slice(0, 1)  # => "�"
+  # "¥hello".byte_slice(1, 1)  # => "�"
+  # "¥hello".byte_slice(1, 2)  # => "�h"
+  # "hello".byte_slice(6, 2)   # raises IndexError
+  # "hello".byte_slice(-6, 2)  # raises IndexError
+  # "hello".byte_slice(0, -2)  # raises ArgumentError
+  # ```
+  def byte_slice(start : Int, count : Int) : String
     byte_slice?(start, count) || raise IndexError.new
   end
 
-  def byte_slice?(start : Int, count : Int)
+  # Like `byte_slice(Int, Int)` but returns `Nil` if the *start* index is out of range.
+  def byte_slice?(start : Int, count : Int) : String | Nil
     raise ArgumentError.new "Negative count" if count < 0
 
     start += bytesize if start < 0
@@ -903,19 +957,77 @@ class String
     byte_slice start, bytesize - start
   end
 
-  def codepoint_at(index)
+  # Returns a substring starting from the *start* byte.
+  #
+  # The *start* argument can be negative to start counting
+  # from the end of the string.
+  #
+  # Be careful when working with multibyte characters - they can be splitted
+  # which may lead to unexpected result.
+  #
+  # Raises `IndexError` if *start* index is out of range.
+  #
+  # ```
+  # "hello".byte_slice(0)  # => "hello"
+  # "hello".byte_slice(2)  # => "llo"
+  # "hello".byte_slice(-2) # => "lo"
+  # "¥hello".byte_slice(2) # => "hello"
+  # "¥hello".byte_slice(1) # => "�hello"
+  # "hello".byte_slice(6)  # raises IndexError
+  # "hello".byte_slice(-6) # raises IndexError
+  # ```
+  # Returns the codepoint of `Char` at the given *index*.
+  #
+  # Raises `IndexError` if the *index* is out of range.
+  #
+  # See also: `Char#ord`.
+  #
+  # ```
+  # "hello".codepoint_at(0)  # => 104
+  # "hello".codepoint_at(-1) # => 111
+  # "hello".codepoint_at(5)  # raises IndexError
+  # ```
+  def codepoint_at(index) : Int32
     char_at(index).ord
   end
 
-  def byte_at(index)
+  # Returns the byte at the given *index*.
+  #
+  # Raises `IndexError` if the *index* is out of range.
+  #
+  # ```
+  # "¥hello".byte_at(0)  # => 194
+  # "¥hello".byte_at(1)  # => 165
+  # "¥hello".byte_at(2)  # => 104
+  # "¥hello".byte_at(-1) # => 111
+  # "¥hello".byte_at(6)  # => 111
+  # "¥hello".byte_at(7)  # raises IndexError
+  # ```
+  def byte_at(index) : UInt8
     byte_at(index) { raise IndexError.new }
   end
 
-  def byte_at?(index)
+  # Returns the byte at the given *index*, or nil if out of bounds.
+  #
+  # ```
+  # "¥hello".byte_at(0)  # => 194
+  # "¥hello".byte_at(1)  # => 165
+  # "¥hello".byte_at(2)  # => 104
+  # "¥hello".byte_at(-1) # => 111
+  # "¥hello".byte_at(6)  # => 111
+  # "¥hello".byte_at(7)  # => nil
+  # ```
+  def byte_at?(index) : UInt8 | Nil
     byte_at(index) { nil }
   end
 
-  def byte_at(index)
+  # Returns the byte at the given *index*, or yield if out of bounds.
+  #
+  # ```
+  # "¥hello".byte_at(6) { 0 } # => 111
+  # "¥hello".byte_at(7) { 0 } # => 0
+  # ```
+  def byte_at(index, &)
     index += bytesize if index < 0
     if 0 <= index < bytesize
       to_unsafe[index]
@@ -2418,7 +2530,10 @@ class String
     self if !blank?
   end
 
-  def ==(other : self)
+  # Returns `true` if this string is the same as other.
+  # Comparison is done byte-per-byte: if a byte is less then the other corresponding
+  # byte, `false` is returned and so on.
+  def ==(other : self) : Bool
     return true if same?(other)
     return false unless bytesize == other.bytesize
     to_unsafe.memcmp(other.to_unsafe, bytesize) == 0
@@ -2935,6 +3050,15 @@ class String
     {pre, mid, post}
   end
 
+  # Returns the index of *byte* in the string, or `nil` if the byte is not present.
+  # If *offset* is present, it defines the position to start the search.
+  #
+  # ```
+  # "Hello, World".byte_index(0x6f)    # => 4
+  # "Hello, World".byte_index(0x5a)    # => nil
+  # "Hello, World".byte_index(0x6f, 5) # => 8
+  # "💣".byte_index(0xA3)               # => 3
+  # ```
   def byte_index(byte : Int, offset = 0)
     offset.upto(bytesize - 1) do |i|
       if to_unsafe[i] == byte
@@ -2944,6 +3068,12 @@ class String
     nil
   end
 
+  # Returns the byte index of *search* in the string, or `nil` if the string is not present.
+  # If *offset* is present, it defines the position to start the search.
+  #
+  # ```
+  # "¥hello".byte_index("hello") # => 2
+  # ```
   def byte_index(search : String, offset = 0)
     offset += bytesize if offset < 0
     return if offset < 0
@@ -4219,12 +4349,25 @@ class String
     io << '}' if char.ord > 0xFFFF
   end
 
-  def starts_with?(str : String)
+  # Returns true if this string starts with the given *str*, otherwise `false`.
+  #
+  # ```
+  # "hello".starts_with?("h")  # => true
+  # "hello".starts_with?("he") # => true
+  # "hello".starts_with?("hu") # => false
+  # ```
+  def starts_with?(str : String) : Bool
     return false if str.bytesize > bytesize
     to_unsafe.memcmp(str.to_unsafe, str.bytesize) == 0
   end
 
-  def starts_with?(char : Char)
+  # Returns `true` if this string starts with the given *char*, otherwise `false`.
+  #
+  # ```
+  # "hello".starts_with?('h') # => true
+  # "hello".starts_with?('e') # => false
+  # ```
+  def starts_with?(char : Char) : Bool
     each_char do |c|
       return c == char
     end
@@ -4232,16 +4375,39 @@ class String
     false
   end
 
-  def starts_with?(re : Regex)
+  # Returns true if this string starts with the given *re* regular expression, otherwise `false`.
+  #
+  # ```
+  # "22hello".starts_with?(/[0-9]/) # => true
+  # "22hello".starts_with?(/[a-z]/) # => false
+  # "h22".starts_with?(/[a-z]/)     # => true
+  # "h22".starts_with?(/[A-Z]/)     # => true
+  # "h22".starts_with?(/[a-z]{2}/)  # => false
+  # "hh22".starts_with?(/[a-z]{2}/) # => true
+  # ```
+  def starts_with?(re : Regex) : Bool
     !!($~ = re.match_at_byte_index(self, 0, Regex::Options::ANCHORED))
   end
 
-  def ends_with?(str : String)
+  # Returns true if this string ends with the given *str*, otherwise `false`.
+  #
+  # ```
+  # "hello".ends_with?("o")  # => true
+  # "hello".ends_with?("lo") # => true
+  # "hello".ends_with?("ll") # => false
+  # ```
+  def ends_with?(str : String) : Bool
     return false if str.bytesize > bytesize
     (to_unsafe + bytesize - str.bytesize).memcmp(str.to_unsafe, str.bytesize) == 0
   end
 
-  def ends_with?(char : Char)
+  # Returns true if this string ends with the given *char*, otherwise `false`.
+  #
+  # ```
+  # "hello".ends_with?('o') # => true
+  # "hello".ends_with?('l') # => false
+  # ```
+  def ends_with?(char : Char) : Bool
     return false unless bytesize > 0
 
     if char.ascii? || ascii_only?
@@ -4258,7 +4424,17 @@ class String
     true
   end
 
-  def ends_with?(re : Regex)
+  # Returns true if this string ends with the given *re* regular expression, otherwise `false`.
+  #
+  # ```
+  # "22hello".ends_with?(/[0-9]/) # => false
+  # "22hello".ends_with?(/[a-z]/) # => true
+  # "22h".ends_with?(/[a-z]/)     # => true
+  # "22h".ends_with?(/[A-Z]/)     # => true
+  # "22h".ends_with?(/[a-z]{2}/)  # => false
+  # "22hh".ends_with?(/[a-z]{2}/) # => true
+  # ```
+  def ends_with?(re : Regex) : Bool
     !!($~ = /#{re}\z/.match(self))
   end
 
@@ -4397,18 +4573,22 @@ class String
     char_index
   end
 
-  def clone
+  # Returns `self`
+  def clone : String
     self
   end
 
-  def dup
+  # Returns `self`
+  def dup : String
     self
   end
 
+  # Returns `self`
   def to_s : String
     self
   end
 
+  # Appends `self` characters to the given IO object.
   def to_s(io : IO) : Nil
     io.write_utf8(to_slice)
   end
@@ -4425,10 +4605,16 @@ class String
     pointerof(@c)
   end
 
+  # Returns *count* of underlying bytes of this String starting at given *byte_offset* in an **unsafe** way.
+  #
+  # The returned slice is read-only.
   def unsafe_byte_slice(byte_offset, count)
     Slice.new(to_unsafe + byte_offset, count, read_only: true)
   end
 
+  # Returns the underlying bytes of this String starting at given *byte_offset* in an **unsafe** way.
+  #
+  # The returned slice is read-only.
   def unsafe_byte_slice(byte_offset)
     Slice.new(to_unsafe + byte_offset, bytesize - byte_offset, read_only: true)
   end

From 4bfd993c3039fca23176896eac6e21688ce2d8a0 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Sat, 9 Nov 2019 21:12:23 +0100
Subject: [PATCH 02/10] change byte_slice(Int) ArgumentError->IndexError when
 out of range

---
 spec/std/string_spec.cr | 12 ++++++++++++
 src/string.cr           |  6 ++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr
index 8ace564ab67f..ececca3c7766 100644
--- a/spec/std/string_spec.cr
+++ b/spec/std/string_spec.cr
@@ -229,6 +229,18 @@ describe "String" do
     it "gets byte_slice with negative index" do
       "hello".byte_slice(-2, 3).should eq("lo")
     end
+
+    it "gets byte_slice(Int) with with start out of bounds" do
+      expect_raises(IndexError) do
+        "hello".byte_slice(10)
+      end
+    end
+
+    it "gets byte_slice(Int) with with start out of bounds" do
+      expect_raises(IndexError) do
+        "hello".byte_slice(-10)
+      end
+    end
   end
 
   describe "to_i" do
diff --git a/src/string.cr b/src/string.cr
index dac1b98bf53f..7dd2e1b18c86 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -953,8 +953,10 @@ class String
     end
   end
 
-  def byte_slice(start : Int)
-    byte_slice start, bytesize - start
+  def byte_slice(start : Int) : String
+    count = bytesize - start
+    raise IndexError.new if start > 0 && count < 0
+    byte_slice start, count
   end
 
   # Returns a substring starting from the *start* byte.

From 352373be4c8d6b9abf859c2e44d0e17cf6e6c218 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Sat, 16 Nov 2019 22:51:26 +0100
Subject: [PATCH 03/10] add missing documentation to String methods #2

---
 src/string.cr | 120 +++++++++++++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 49 deletions(-)

diff --git a/src/string.cr b/src/string.cr
index 7dd2e1b18c86..adb6dad27d45 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -729,7 +729,7 @@ class String
   #
   # Negative indices can be used to start counting from the end of the string.
   #
-  # Raises `IndexError` if the *index* is out of range.
+  # Raises `IndexError` if the *index* is out of bounds.
   #
   # ```
   # "hello"[0]  # => 'h'
@@ -746,7 +746,7 @@ class String
   # as character indices. Indices can be negative to start
   # counting from the end of the string.
   #
-  # Raises `IndexError` if the range's start is out of range.
+  # Raises `IndexError` if the range's start is out of bounds.
   #
   # ```
   # "hello"[0..2]   # => "hel"
@@ -759,7 +759,7 @@ class String
     self[*Indexable.range_to_index_and_count(range, size)]
   end
 
-  # Like `#[Range]`, but returns `nil` if the range's start is out of range.
+  # Like `#[Range]`, but returns `nil` if the range's start is out of bounds.
   #
   # ```
   # "hello"[6..7]? # => nil
@@ -771,17 +771,17 @@ class String
 
   # Returns a substring starting from the *start* character of size *count*.
   #
-  # The *start* argument can be negative to start counting
+  # *start* can can be negative to start counting
   # from the end of the string.
   #
-  # Raises `IndexError` if the *start* index is out of range.
+  # Raises `IndexError` if the *start* index is out of bounds.
   #
   # Raises `ArgumentError` if *count* is negative.
   def [](start : Int, count : Int)
     self[start, count]? || raise IndexError.new
   end
 
-  # Like `#[Int, Int]` but returns `nil` if the *start* index is out of range.
+  # Like `#[Int, Int]` but returns `nil` if the *start* index is out of bounds.
   def []?(start : Int, count : Int)
     raise ArgumentError.new "Negative count: #{count}" if count < 0
     return byte_slice?(start, count) if ascii_only?
@@ -855,7 +855,7 @@ class String
   #
   # Negative indices can be used to start counting from the end of the string.
   #
-  # Raises `IndexError` if the *index* is out of range.
+  # Raises `IndexError` if the *index* is out of bounds.
   #
   # ```
   # "hello".char_at(0)  # => 'h'
@@ -873,8 +873,11 @@ class String
   # Negative indices can be used to start counting from the end of the string.
   #
   # ```
-  # "hello".char_at(4) { 'x' } # => 'o'
-  # "hello".char_at(5) { 'x' } # => 'x'
+  # "hello".char_at(4) { 'x' }  # => 'o'
+  # "hello".char_at(5) { 'x' }  # => 'x'
+  # "hello".char_at(-1) { 'x' } # => 'o'
+  # "hello".char_at(-5) { 'x' } # => 'h'
+  # "hello".char_at(-6) { 'x' } # => 'x'
   # ```
   def char_at(index : Int, &)
     if ascii_only?
@@ -897,18 +900,18 @@ class String
     end
   end
 
-  # Returns a new string consisted of *count* bytes starting at *start* byte.
+  # Returns a new string built from *count* bytes starting at *start* byte.
   #
-  # The *start* argument can be negative to start counting
+  # *start* can can be negative to start counting
   # from the end of the string.
-  # If `count` is bigger than number of bytes from *start* to `bytelen`,
+  # If *count* is bigger than the number of bytes from *start* to `#bytesize`,
   # only remaining bytes are returned.
   #
   # Be careful when working with multibyte characters - they can be splitted,
   # which may lead to invalid UTF-8 values. These,
   # when asked as chars, will use the unicode replacement �.
   #
-  # Raises `IndexError` if the *start* index is out of range.
+  # Raises `IndexError` if the *start* index is out of bounds.
   #
   # Raises `ArgumentError` if *count* is negative.
   #
@@ -931,7 +934,17 @@ class String
     byte_slice?(start, count) || raise IndexError.new
   end
 
-  # Like `byte_slice(Int, Int)` but returns `Nil` if the *start* index is out of range.
+  # Like `byte_slice(Int, Int)` but returns `Nil` if the *start* index is out of bounds.
+  #
+  # Raises `ArgumentError` if *count* is negative.
+  #
+  # ```
+  # "hello".byte_slice(0, 2)   # => "he"
+  # "hello".byte_slice(0, 100) # => "hello"
+  # "hello".byte_slice(6, 2)   # => nil
+  # "hello".byte_slice(-6, 2)  # => nil
+  # "hello".byte_slice(0, -2)  # raises ArgumentError
+  # ```
   def byte_slice?(start : Int, count : Int) : String | Nil
     raise ArgumentError.new "Negative count" if count < 0
 
@@ -953,21 +966,15 @@ class String
     end
   end
 
-  def byte_slice(start : Int) : String
-    count = bytesize - start
-    raise IndexError.new if start > 0 && count < 0
-    byte_slice start, count
-  end
-
   # Returns a substring starting from the *start* byte.
   #
-  # The *start* argument can be negative to start counting
+  # *start* can can be negative to start counting
   # from the end of the string.
   #
   # Be careful when working with multibyte characters - they can be splitted
   # which may lead to unexpected result.
   #
-  # Raises `IndexError` if *start* index is out of range.
+  # Raises `IndexError` if *start* index is out of bounds.
   #
   # ```
   # "hello".byte_slice(0)  # => "hello"
@@ -978,9 +985,17 @@ class String
   # "hello".byte_slice(6)  # raises IndexError
   # "hello".byte_slice(-6) # raises IndexError
   # ```
-  # Returns the codepoint of `Char` at the given *index*.
+  def byte_slice(start : Int) : String
+    count = bytesize - start
+    raise IndexError.new if start > 0 && count < 0
+    byte_slice start, count
+  end
+
+  # Returns the codepoint of the character at the given *index*.
+  #
+  # Negative indices can be used to start counting from the end of the string.
   #
-  # Raises `IndexError` if the *index* is out of range.
+  # Raises `IndexError` if the *index* is out of bounds.
   #
   # See also: `Char#ord`.
   #
@@ -995,7 +1010,7 @@ class String
 
   # Returns the byte at the given *index*.
   #
-  # Raises `IndexError` if the *index* is out of range.
+  # Raises `IndexError` if the *index* is out of bounds.
   #
   # ```
   # "¥hello".byte_at(0)  # => 194
@@ -1009,7 +1024,7 @@ class String
     byte_at(index) { raise IndexError.new }
   end
 
-  # Returns the byte at the given *index*, or nil if out of bounds.
+  # Returns the byte at the given *index*, or `nil` if out of bounds.
   #
   # ```
   # "¥hello".byte_at(0)  # => 194
@@ -1023,11 +1038,11 @@ class String
     byte_at(index) { nil }
   end
 
-  # Returns the byte at the given *index*, or yield if out of bounds.
+  # Returns the byte at the given *index*, or yields if out of bounds.
   #
   # ```
-  # "¥hello".byte_at(6) { 0 } # => 111
-  # "¥hello".byte_at(7) { 0 } # => 0
+  # "¥hello".byte_at(6) { "OUT OF BOUNDS" } # => 111
+  # "¥hello".byte_at(7) { "OUT OF BOUNDS" } # => "OUT OF BOUNDS"
   # ```
   def byte_at(index, &)
     index += bytesize if index < 0
@@ -2532,8 +2547,8 @@ class String
     self if !blank?
   end
 
-  # Returns `true` if this string is the same as other.
-  # Comparison is done byte-per-byte: if a byte is less then the other corresponding
+  # Returns `true` if this string is equal to `*other*.
+  # Comparison is done byte-per-byte: if a byte is different from the corresponding
   # byte, `false` is returned and so on.
   def ==(other : self) : Bool
     return true if same?(other)
@@ -3061,7 +3076,7 @@ class String
   # "Hello, World".byte_index(0x6f, 5) # => 8
   # "💣".byte_index(0xA3)               # => 3
   # ```
-  def byte_index(byte : Int, offset = 0)
+  def byte_index(byte : Int, offset = 0) : Int32?
     offset.upto(bytesize - 1) do |i|
       if to_unsafe[i] == byte
         return i
@@ -3073,10 +3088,17 @@ class String
   # Returns the byte index of *search* in the string, or `nil` if the string is not present.
   # If *offset* is present, it defines the position to start the search.
   #
+  # # Negative *offset* can be used to start the search from the end of the string.
+  #
   # ```
-  # "¥hello".byte_index("hello") # => 2
+  # "¥hello".byte_index("hello")              # => 2
+  # "hello".byte_index("world")               # => nil
+  # "Dizzy Miss Lizzy".byte_index("izzy")     # => 1
+  # "Dizzy Miss Lizzy".byte_index("izzy", 2)  # => 12
+  # "Dizzy Miss Lizzy".byte_index("izzy", -4) # => 12
+  # "Dizzy Miss Lizzy".byte_index("izzy", -4) # => nil
   # ```
-  def byte_index(search : String, offset = 0)
+  def byte_index(search : String, offset = 0) : Int32?
     offset += bytesize if offset < 0
     return if offset < 0
 
@@ -4351,7 +4373,7 @@ class String
     io << '}' if char.ord > 0xFFFF
   end
 
-  # Returns true if this string starts with the given *str*, otherwise `false`.
+  # Returns `true` if this string starts with the given *str*.
   #
   # ```
   # "hello".starts_with?("h")  # => true
@@ -4363,7 +4385,7 @@ class String
     to_unsafe.memcmp(str.to_unsafe, str.bytesize) == 0
   end
 
-  # Returns `true` if this string starts with the given *char*, otherwise `false`.
+  # Returns `true` if this string starts with the given *char*.
   #
   # ```
   # "hello".starts_with?('h') # => true
@@ -4377,7 +4399,7 @@ class String
     false
   end
 
-  # Returns true if this string starts with the given *re* regular expression, otherwise `false`.
+  # Returns `true` if the regular expression *re* matches at the start of this string.
   #
   # ```
   # "22hello".starts_with?(/[0-9]/) # => true
@@ -4391,7 +4413,7 @@ class String
     !!($~ = re.match_at_byte_index(self, 0, Regex::Options::ANCHORED))
   end
 
-  # Returns true if this string ends with the given *str*, otherwise `false`.
+  # Returns `true` if this string ends with the given *str*.
   #
   # ```
   # "hello".ends_with?("o")  # => true
@@ -4403,7 +4425,7 @@ class String
     (to_unsafe + bytesize - str.bytesize).memcmp(str.to_unsafe, str.bytesize) == 0
   end
 
-  # Returns true if this string ends with the given *char*, otherwise `false`.
+  # Returns `true` if this string ends with the given *char*.
   #
   # ```
   # "hello".ends_with?('o') # => true
@@ -4426,7 +4448,7 @@ class String
     true
   end
 
-  # Returns true if this string ends with the given *re* regular expression, otherwise `false`.
+  # Returns `true` if the regular expression *re* matches at the end of this string.
   #
   # ```
   # "22hello".ends_with?(/[0-9]/) # => false
@@ -4575,27 +4597,27 @@ class String
     char_index
   end
 
-  # Returns `self`
+  # Returns `self`.
   def clone : String
     self
   end
 
-  # Returns `self`
+  # ditto
   def dup : String
     self
   end
 
-  # Returns `self`
+  # ditto
   def to_s : String
     self
   end
 
-  # Appends `self` characters to the given IO object.
+  # Appends `self` to *io*.
   def to_s(io : IO) : Nil
     io.write_utf8(to_slice)
   end
 
-  # Returns the underlying bytes of this String in an **unsafe** way.
+  # Returns the underlying bytes of this String.
   #
   # The returned slice is read-only.
   def to_slice : Bytes
@@ -4607,17 +4629,17 @@ class String
     pointerof(@c)
   end
 
-  # Returns *count* of underlying bytes of this String starting at given *byte_offset* in an **unsafe** way.
+  # Returns *count* of underlying bytes of this String starting at given *byte_offset*.
   #
   # The returned slice is read-only.
-  def unsafe_byte_slice(byte_offset, count)
+  def unsafe_byte_slice(byte_offset, count) : Slice
     Slice.new(to_unsafe + byte_offset, count, read_only: true)
   end
 
-  # Returns the underlying bytes of this String starting at given *byte_offset* in an **unsafe** way.
+  # Returns the underlying bytes of this String starting at given *byte_offset*.
   #
   # The returned slice is read-only.
-  def unsafe_byte_slice(byte_offset)
+  def unsafe_byte_slice(byte_offset) : Slice
     Slice.new(to_unsafe + byte_offset, bytesize - byte_offset, read_only: true)
   end
 

From d18213f1a09113ad5d305b32d954384095bba734 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@gmail.com>
Date: Sun, 17 Nov 2019 20:56:44 +0100
Subject: [PATCH 04/10] byte_index doc typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Johannes Müller <johannes.mueller@smj-fulda.org>
---
 src/string.cr | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/string.cr b/src/string.cr
index adb6dad27d45..5b7b56932714 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -3088,7 +3088,7 @@ class String
   # Returns the byte index of *search* in the string, or `nil` if the string is not present.
   # If *offset* is present, it defines the position to start the search.
   #
-  # # Negative *offset* can be used to start the search from the end of the string.
+  # Negative *offset* can be used to start the search from the end of the string.
   #
   # ```
   # "¥hello".byte_index("hello")              # => 2

From dd46819294d78badd9082a65603778e3211d1d3e Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Sun, 17 Nov 2019 21:16:00 +0100
Subject: [PATCH 05/10] change ditto to :ditto: in string doc

Signed-off-by: Jan Zajic <jan.zajic@corpus.cz>
---
 src/string.cr | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/string.cr b/src/string.cr
index 5b7b56932714..3a4cf399af01 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -4602,12 +4602,12 @@ class String
     self
   end
 
-  # ditto
+  # :ditto:
   def dup : String
     self
   end
 
-  # ditto
+  # :ditto:
   def to_s : String
     self
   end

From 456ba9ebb0642ad49b620f574d158f782634e3e9 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Thu, 21 Nov 2019 16:23:08 +0100
Subject: [PATCH 06/10] validate offset in String#byte_index(Int,offset) and
 make negative offset possible

---
 spec/std/string_spec.cr |  8 ++++++++
 src/string.cr           | 17 +++++++++++++----
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr
index ececca3c7766..0ade01d2fee6 100644
--- a/spec/std/string_spec.cr
+++ b/spec/std/string_spec.cr
@@ -932,6 +932,14 @@ describe "String" do
     it { "foo".byte_index('o'.ord).should eq(1) }
     it { "foo bar booz".byte_index('o'.ord, 3).should eq(9) }
     it { "foo".byte_index('a'.ord).should be_nil }
+    it { "foo".byte_index('a'.ord).should be_nil }
+    it { "foo".byte_index('o'.ord, 3).should be_nil }
+    it {
+      "Dizzy Miss Lizzy".byte_index('z'.ord).should eq(2)
+      "Dizzy Miss Lizzy".byte_index('z'.ord, 3).should eq(3)
+      "Dizzy Miss Lizzy".byte_index('z'.ord, -4).should eq(13)
+      "Dizzy Miss Lizzy".byte_index('z'.ord, -17).should be_nil
+    }
 
     it "gets byte index of string" do
       "hello world".byte_index("he").should eq(0)
diff --git a/src/string.cr b/src/string.cr
index 3a4cf399af01..01acb2f44858 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -3070,13 +3070,22 @@ class String
   # Returns the index of *byte* in the string, or `nil` if the byte is not present.
   # If *offset* is present, it defines the position to start the search.
   #
+  # Negative *offset* can be used to start the search from the end of the string.
+  #
   # ```
-  # "Hello, World".byte_index(0x6f)    # => 4
-  # "Hello, World".byte_index(0x5a)    # => nil
-  # "Hello, World".byte_index(0x6f, 5) # => 8
-  # "💣".byte_index(0xA3)               # => 3
+  # "Hello, World".byte_index(0x6f)             # => 4
+  # "Hello, World".byte_index(0x5a)             # => nil
+  # "Hello, World".byte_index(0x6f, 5)          # => 8
+  # "💣".byte_index(0xA3)                        # => 3
+  # "Dizzy Miss Lizzy".byte_index('z'.ord)      # => 2
+  # "Dizzy Miss Lizzy".byte_index('z'.ord, 3)   # => 3
+  # "Dizzy Miss Lizzy".byte_index('z'.ord, -4)  # => 13
+  # "Dizzy Miss Lizzy".byte_index('z'.ord, -17) # => nil
   # ```
   def byte_index(byte : Int, offset = 0) : Int32?
+    offset += bytesize if offset < 0
+    return if offset < 0
+
     offset.upto(bytesize - 1) do |i|
       if to_unsafe[i] == byte
         return i

From 01788598ec8a7e1ac68da47760193d16fc910574 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Mon, 25 Nov 2019 22:20:41 +0100
Subject: [PATCH 07/10] Document String byte_slice risks

---
 src/string.cr | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/string.cr b/src/string.cr
index 01acb2f44858..974e58d9ae04 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -907,9 +907,10 @@ class String
   # If *count* is bigger than the number of bytes from *start* to `#bytesize`,
   # only remaining bytes are returned.
   #
-  # Be careful when working with multibyte characters - they can be splitted,
-  # which may lead to invalid UTF-8 values. These,
-  # when asked as chars, will use the unicode replacement �.
+  # This method should be avoided,
+  # unless the string is proven to be ASCII-only (for example `#ascii_only?`),
+  # or the byte positions are known to be at character boundaries.
+  # Otherwise the characters are splitted, which leads to invalid UTF-8 values.
   #
   # Raises `IndexError` if the *start* index is out of bounds.
   #

From 43a330b6dbba112d6255ccab6644e70f5c395248 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Fri, 3 Jan 2020 14:30:26 +0100
Subject: [PATCH 08/10] #8447 requested changes

---
 spec/std/string_spec.cr |  5 +----
 src/string.cr           | 20 ++++++++++++--------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr
index 0ade01d2fee6..a0ff535f3924 100644
--- a/spec/std/string_spec.cr
+++ b/spec/std/string_spec.cr
@@ -230,13 +230,10 @@ describe "String" do
       "hello".byte_slice(-2, 3).should eq("lo")
     end
 
-    it "gets byte_slice(Int) with with start out of bounds" do
+    it "gets byte_slice(Int) with start out of bounds" do
       expect_raises(IndexError) do
         "hello".byte_slice(10)
       end
-    end
-
-    it "gets byte_slice(Int) with with start out of bounds" do
       expect_raises(IndexError) do
         "hello".byte_slice(-10)
       end
diff --git a/src/string.cr b/src/string.cr
index 974e58d9ae04..6e1e227522f4 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -868,7 +868,7 @@ class String
     char_at(index) { raise IndexError.new }
   end
 
-  # Returns the `Char` at the given *index*, or yields if out of bounds.
+  # Returns the `Char` at the given *index*, or result of running the given block if out of bounds.
   #
   # Negative indices can be used to start counting from the end of the string.
   #
@@ -910,7 +910,7 @@ class String
   # This method should be avoided,
   # unless the string is proven to be ASCII-only (for example `#ascii_only?`),
   # or the byte positions are known to be at character boundaries.
-  # Otherwise the characters are splitted, which leads to invalid UTF-8 values.
+  # Otherwise, multi-byte characters may be split, leading to an invalid UTF-8 encoding.
   #
   # Raises `IndexError` if the *start* index is out of bounds.
   #
@@ -924,9 +924,9 @@ class String
   # "hello".byte_slice(-2, 5)  # => "he"
   # "¥hello".byte_slice(0, 2)  # => "¥"
   # "¥hello".byte_slice(2, 2)  # => "he"
-  # "¥hello".byte_slice(0, 1)  # => "�"
-  # "¥hello".byte_slice(1, 1)  # => "�"
-  # "¥hello".byte_slice(1, 2)  # => "�h"
+  # "¥hello".byte_slice(0, 1)  # => "�" (invalid UTF-8 character)
+  # "¥hello".byte_slice(1, 1)  # => "�" (invalid UTF-8 character)
+  # "¥hello".byte_slice(1, 2)  # => "�h" (invalid UTF-8 character)
   # "hello".byte_slice(6, 2)   # raises IndexError
   # "hello".byte_slice(-6, 2)  # raises IndexError
   # "hello".byte_slice(0, -2)  # raises ArgumentError
@@ -972,8 +972,10 @@ class String
   # *start* can can be negative to start counting
   # from the end of the string.
   #
-  # Be careful when working with multibyte characters - they can be splitted
-  # which may lead to unexpected result.
+  # This method should be avoided,
+  # unless the string is proven to be ASCII-only (for example `#ascii_only?`),
+  # or the byte positions are known to be at character boundaries.
+  # Otherwise, multi-byte characters may be split, leading to an invalid UTF-8 encoding.
   #
   # Raises `IndexError` if *start* index is out of bounds.
   #
@@ -982,7 +984,7 @@ class String
   # "hello".byte_slice(2)  # => "llo"
   # "hello".byte_slice(-2) # => "lo"
   # "¥hello".byte_slice(2) # => "hello"
-  # "¥hello".byte_slice(1) # => "�hello"
+  # "¥hello".byte_slice(1) # => "�hello" (invalid UTF-8 character)
   # "hello".byte_slice(6)  # raises IndexError
   # "hello".byte_slice(-6) # raises IndexError
   # ```
@@ -2551,6 +2553,8 @@ class String
   # Returns `true` if this string is equal to `*other*.
   # Comparison is done byte-per-byte: if a byte is different from the corresponding
   # byte, `false` is returned and so on.
+  #
+  # See `#compare` for more comparison options.
   def ==(other : self) : Bool
     return true if same?(other)
     return false unless bytesize == other.bytesize

From 2620493a24a7101312b17c9f96563527915bff40 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Mon, 6 Jan 2020 17:05:39 +0100
Subject: [PATCH 09/10] typo in byte_slice?, byte_at? doc

---
 src/string.cr | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/string.cr b/src/string.cr
index 6e1e227522f4..5300887d9a94 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -940,11 +940,11 @@ class String
   # Raises `ArgumentError` if *count* is negative.
   #
   # ```
-  # "hello".byte_slice(0, 2)   # => "he"
-  # "hello".byte_slice(0, 100) # => "hello"
-  # "hello".byte_slice(6, 2)   # => nil
-  # "hello".byte_slice(-6, 2)  # => nil
-  # "hello".byte_slice(0, -2)  # raises ArgumentError
+  # "hello".byte_slice?(0, 2)   # => "he"
+  # "hello".byte_slice?(0, 100) # => "hello"
+  # "hello".byte_slice?(6, 2)   # => nil
+  # "hello".byte_slice?(-6, 2)  # => nil
+  # "hello".byte_slice?(0, -2)  # raises ArgumentError
   # ```
   def byte_slice?(start : Int, count : Int) : String | Nil
     raise ArgumentError.new "Negative count" if count < 0
@@ -1030,12 +1030,12 @@ class String
   # Returns the byte at the given *index*, or `nil` if out of bounds.
   #
   # ```
-  # "¥hello".byte_at(0)  # => 194
-  # "¥hello".byte_at(1)  # => 165
-  # "¥hello".byte_at(2)  # => 104
-  # "¥hello".byte_at(-1) # => 111
-  # "¥hello".byte_at(6)  # => 111
-  # "¥hello".byte_at(7)  # => nil
+  # "¥hello".byte_at?(0)  # => 194
+  # "¥hello".byte_at?(1)  # => 165
+  # "¥hello".byte_at?(2)  # => 104
+  # "¥hello".byte_at?(-1) # => 111
+  # "¥hello".byte_at?(6)  # => 111
+  # "¥hello".byte_at?(7)  # => nil
   # ```
   def byte_at?(index) : UInt8 | Nil
     byte_at(index) { nil }

From 1d8df36adcd57ec74af0cca0e303bfd55e046fc4 Mon Sep 17 00:00:00 2001
From: Jan Zajic <jan.zajic@corpus.cz>
Date: Thu, 9 Apr 2020 10:08:03 +0200
Subject: [PATCH 10/10] requested change of index/byte_index doc

---
 src/string.cr | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/string.cr b/src/string.cr
index 5300887d9a94..b810daa6e298 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -2761,7 +2761,7 @@ class String
     {% end %}
   end
 
-  # Returns the index of *search* in the string, or `nil` if the string is not present.
+  # Returns the index of the _first_ occurrence of *search* in the string, or `nil` if not present.
   # If *offset* is present, it defines the position to start the search.
   #
   # ```
@@ -3072,7 +3072,7 @@ class String
     {pre, mid, post}
   end
 
-  # Returns the index of *byte* in the string, or `nil` if the byte is not present.
+  # Returns the index of the _first_ ocurrence of *byte* in the string, or `nil` if not present.
   # If *offset* is present, it defines the position to start the search.
   #
   # Negative *offset* can be used to start the search from the end of the string.