crystal-lang · straight-shoota · Dec 23, 2022 · Nov 30, 2022 · Nov 30, 2022 · Nov 30, 2022
diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr
@@ -203,9 +203,10 @@ describe "String" do
     end
   end
 
-  describe "byte_slice" do
+  describe "#byte_slice" do
     it "gets byte_slice" do
       "hello".byte_slice(1, 3).should eq("ell")
+      "hello".byte_slice(1..3).should eq("ell")
     end
 
     it "gets byte_slice with negative count" do
@@ -224,14 +225,19 @@ describe "String" do
       expect_raises(IndexError) do
         "hello".byte_slice(10, 3)
       end
+      expect_raises(IndexError) do
+        "hello".byte_slice(10..13)
+      end
     end
 
     it "gets byte_slice with large count" do
       "hello".byte_slice(1, 10).should eq("ello")
+      "hello".byte_slice(1..10).should eq("ello")
     end
 
     it "gets byte_slice with negative index" do
       "hello".byte_slice(-2, 3).should eq("lo")
+      "hello".byte_slice(-2..-1).should eq("lo")
     end
 
     it "gets byte_slice(Int) with start out of bounds" do
@@ -244,6 +250,45 @@ describe "String" do
     end
   end
 
+  describe "#byte_slice?" do
+    it "gets byte_slice" do
+      "hello".byte_slice?(1, 3).should eq("ell")
+      "hello".byte_slice?(1..3).should eq("ell")
+    end
+
+    it "gets byte_slice with negative count" do
+      expect_raises(ArgumentError) do
+        "hello".byte_slice?(1, -10)
+      end
+    end
+
+    it "gets byte_slice with negative count at last" do
+      expect_raises(ArgumentError) do
+        "hello".byte_slice?(5, -1)
+      end
+    end
+
+    it "gets byte_slice with start out of bounds" do
+      "hello".byte_slice?(10, 3).should be_nil
+      "hello".byte_slice?(10..13).should be_nil
+    end
+
+    it "gets byte_slice with large count" do
+      "hello".byte_slice?(1, 10).should eq("ello")
+      "hello".byte_slice?(1..11).should eq("ello")
+    end
+
+    it "gets byte_slice with negative index" do
+      "hello".byte_slice?(-2, 3).should eq("lo")
+      "hello".byte_slice?(-2..-1).should eq("lo")
+    end
+
+    it "gets byte_slice(Int) with start out of bounds" do
+      "hello".byte_slice?(10).should be_nil
+      "hello".byte_slice?(-10).should be_nil
+    end
+  end
+
   describe "to_i" do
     it { "1234".to_i.should eq(1234) }
     it { "-128".to_i8.should eq(-128) }

diff --git a/src/string.cr b/src/string.cr
@@ -1173,9 +1173,9 @@ class String
   # "hello".byte_slice(-2, 5)  # => "he"
   # "¥hello".byte_slice(0, 2)  # => "¥"
   # "¥hello".byte_slice(2, 2)  # => "he"
-  # "¥hello".byte_slice(0, 1)  # => "�" (invalid UTF-8 character)
-  # "¥hello".byte_slice(1, 1)  # => "�" (invalid UTF-8 character)
-  # "¥hello".byte_slice(1, 2)  # => "�h" (invalid UTF-8 character)
+  # "¥hello".byte_slice(0, 1)  # => "\xC2" (invalid UTF-8 character)
+  # "¥hello".byte_slice(1, 1)  # => "\xA5" (invalid UTF-8 character)
+  # "¥hello".byte_slice(1, 2)  # => "\xA5h" (invalid UTF-8 character)
   # "hello".byte_slice(6, 2)   # raises IndexError
   # "hello".byte_slice(-6, 2)  # raises IndexError
   # "hello".byte_slice(0, -2)  # raises ArgumentError
@@ -1184,6 +1184,35 @@ class String
     byte_slice?(start, count) || raise IndexError.new
   end
 
+  # Returns a new string built from byte in *range*.
+  #
+  # Byte indices can be negative to start counting from the end of the string.
+  # If the end index is bigger than `#bytesize`, only remaining bytes are returned.
+  #
+  # This method should be avoided,
+  # unless the string is proven to be ASCII-only (for example `#ascii_only?`),
+  # or the byte positions are known to be at character boundaries.
+  # Otherwise, multi-byte characters may be split, leading to an invalid UTF-8 encoding.
+  #
+  # Raises `IndexError` if the *range* begin is out of bounds.
+  #
+  # ```
+  # "hello".byte_slice(0..2)   # => "hel"
+  # "hello".byte_slice(0..100) # => "hello"
+  # "hello".byte_slice(-2..3)  # => "l"
+  # "hello".byte_slice(-2..5)  # => "lo"
+  # "¥hello".byte_slice(0...2) # => "¥"
+  # "¥hello".byte_slice(2...4) # => "he"
+  # "¥hello".byte_slice(0..0)  # => "\xC2" (invalid UTF-8 character)
+  # "¥hello".byte_slice(1..1)  # => "\xA5" (invalid UTF-8 character)
+  # "¥hello".byte_slice(1..2)  # => "\xA5h" (invalid UTF-8 character)
+  # "hello".byte_slice(6..2)   # raises IndexError
+  # "hello".byte_slice(-6..2)  # raises IndexError
+  # ```
+  def byte_slice(range : Range) : String
+    byte_slice(*Indexable.range_to_index_and_count(range, bytesize) || raise IndexError.new)
+  end
+
   # Like `byte_slice(Int, Int)` but returns `Nil` if the *start* index is out of bounds.
   #
   # Raises `ArgumentError` if *count* is negative.
@@ -1209,6 +1238,18 @@ class String
     end
   end
 
+  # Like `byte_slice(Range)` but returns `Nil` if *range* begin is out of bounds.
+  #
+  # ```
+  # "hello".byte_slice?(0..2)   # => "hel"
+  # "hello".byte_slice?(0..100) # => "hello"
+  # "hello".byte_slice?(6..8)   # => nil
+  # "hello".byte_slice?(-6..2)  # => nil
+  # ```
+  def byte_slice?(range : Range) : String?
+    byte_slice?(*Indexable.range_to_index_and_count(range, bytesize) || return nil)
+  end
+
   # Returns a substring starting from the *start* byte.
   #
   # *start* can be negative to start counting
@@ -1226,7 +1267,7 @@ class String
   # "hello".byte_slice(2)  # => "llo"
   # "hello".byte_slice(-2) # => "lo"
   # "¥hello".byte_slice(2) # => "hello"
-  # "¥hello".byte_slice(1) # => "�hello" (invalid UTF-8 character)
+  # "¥hello".byte_slice(1) # => "\xA5hello" (invalid UTF-8 character)
   # "hello".byte_slice(6)  # raises IndexError
   # "hello".byte_slice(-6) # raises IndexError
   # ```
@@ -1236,6 +1277,33 @@ class String
     byte_slice start, count
   end
 
+  # Returns a substring starting from the *start* byte.
+  #
+  # *start* can be negative to start counting
+  # from the end of the string.
+  #
+  # This method should be avoided,
+  # unless the string is proven to be ASCII-only (for example `#ascii_only?`),
+  # or the byte positions are known to be at character boundaries.
+  # Otherwise, multi-byte characters may be split, leading to an invalid UTF-8 encoding.
+  #
+  # Returns `nil` if *start* index is out of bounds.
+  #
+  # ```
+  # "hello".byte_slice?(0)  # => "hello"
+  # "hello".byte_slice?(2)  # => "llo"
+  # "hello".byte_slice?(-2) # => "lo"
+  # "¥hello".byte_slice?(2) # => "hello"
+  # "¥hello".byte_slice?(1) # => "\xA5hello" (invalid UTF-8 character)
+  # "hello".byte_slice?(6)  # => nil
+  # "hello".byte_slice?(-6) # => nil
+  # ```
+  def byte_slice?(start : Int) : String?
+    count = bytesize - start
+    return nil if start > 0 && count < 0
+    byte_slice? start, count
+  end
+
   # Returns the codepoint of the character at the given *index*.
   #
   # Negative indices can be used to start counting from the end of the string.