Skip to content

Commit

Permalink
Add support for regexp passed to String#index
Browse files Browse the repository at this point in the history
There are some edge cases I skipped fow now. I thought I would use this
for String#each_line but ended up going a different direction. But this
is still progress.
  • Loading branch information
seven1m committed Jun 16, 2024
1 parent e72dc28 commit c1f443a
Show file tree
Hide file tree
Showing 4 changed files with 390 additions and 8 deletions.
367 changes: 367 additions & 0 deletions spec/core/string/index_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,367 @@
# -*- encoding: utf-8 -*-
require_relative '../../spec_helper'
require_relative 'fixtures/classes'

describe "String#index" do
it "raises a TypeError if passed nil" do
-> { "abc".index nil }.should raise_error(TypeError)
end

it "raises a TypeError if passed a boolean" do
-> { "abc".index true }.should raise_error(TypeError)
end

it "raises a TypeError if passed a Symbol" do
-> { "abc".index :a }.should raise_error(TypeError)
end

it "calls #to_str to convert the first argument" do
char = mock("string index char")
char.should_receive(:to_str).and_return("b")
"abc".index(char).should == 1
end

it "calls #to_int to convert the second argument" do
offset = mock("string index offset")
offset.should_receive(:to_int).and_return(1)
"abc".index("c", offset).should == 2
end

it "raises a TypeError if passed an Integer" do
-> { "abc".index 97 }.should raise_error(TypeError)
end
end

describe "String#index with String" do
it "behaves the same as String#index(char) for one-character strings" do
"blablabla hello cruel world...!".split("").uniq.each do |str|
chr = str[0]
str.index(str).should == str.index(chr)

0.upto(str.size + 1) do |start|
str.index(str, start).should == str.index(chr, start)
end

(-str.size - 1).upto(-1) do |start|
str.index(str, start).should == str.index(chr, start)
end
end
end

it "returns the index of the first occurrence of the given substring" do
"blablabla".index("").should == 0
"blablabla".index("b").should == 0
"blablabla".index("bla").should == 0
"blablabla".index("blabla").should == 0
"blablabla".index("blablabla").should == 0

"blablabla".index("l").should == 1
"blablabla".index("la").should == 1
"blablabla".index("labla").should == 1
"blablabla".index("lablabla").should == 1

"blablabla".index("a").should == 2
"blablabla".index("abla").should == 2
"blablabla".index("ablabla").should == 2
end

it "doesn't set $~" do
$~ = nil

'hello.'.index('ll')
$~.should == nil
end

it "ignores string subclasses" do
"blablabla".index(StringSpecs::MyString.new("bla")).should == 0
StringSpecs::MyString.new("blablabla").index("bla").should == 0
StringSpecs::MyString.new("blablabla").index(StringSpecs::MyString.new("bla")).should == 0
end

it "starts the search at the given offset" do
"blablabla".index("bl", 0).should == 0
"blablabla".index("bl", 1).should == 3
"blablabla".index("bl", 2).should == 3
"blablabla".index("bl", 3).should == 3

"blablabla".index("bla", 0).should == 0
"blablabla".index("bla", 1).should == 3
"blablabla".index("bla", 2).should == 3
"blablabla".index("bla", 3).should == 3

"blablabla".index("blab", 0).should == 0
"blablabla".index("blab", 1).should == 3
"blablabla".index("blab", 2).should == 3
"blablabla".index("blab", 3).should == 3

"blablabla".index("la", 1).should == 1
"blablabla".index("la", 2).should == 4
"blablabla".index("la", 3).should == 4
"blablabla".index("la", 4).should == 4

"blablabla".index("lab", 1).should == 1
"blablabla".index("lab", 2).should == 4
"blablabla".index("lab", 3).should == 4
"blablabla".index("lab", 4).should == 4

"blablabla".index("ab", 2).should == 2
"blablabla".index("ab", 3).should == 5
"blablabla".index("ab", 4).should == 5
"blablabla".index("ab", 5).should == 5

"blablabla".index("", 0).should == 0

NATFIXME 'not sure what is going on here', exception: SpecFailedException do
"blablabla".index("", 1).should == 1
"blablabla".index("", 2).should == 2
"blablabla".index("", 7).should == 7
"blablabla".index("", 8).should == 8
"blablabla".index("", 9).should == 9
end
end

it "starts the search at offset + self.length if offset is negative" do
str = "blablabla"

["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle|
(-str.length .. -1).each do |offset|
str.index(needle, offset).should ==
str.index(needle, offset + str.length)
end
end
end

it "returns nil if the substring isn't found" do
"blablabla".index("B").should == nil
"blablabla".index("z").should == nil
"blablabla".index("BLA").should == nil
"blablabla".index("blablablabla").should == nil
NATFIXME 'return nil', exception: SpecFailedException do
"blablabla".index("", 10).should == nil
end

"hello".index("he", 1).should == nil
"hello".index("he", 2).should == nil
"I’ve got a multibyte character.\n".index("\n\n").should == nil
end

it "returns the character index of a multibyte character" do
"ありがとう".index("が").should == 2
end

it "returns the character index after offset" do
"われわれ".index("わ", 1).should == 2
"ありがとうありがとう".index("が", 3).should == 7
end

it "returns the character index after a partial first match" do
"</</h".index("</h").should == 2
end

it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
char = "れ".encode Encoding::EUC_JP
NATFIXME 'check encoding compatibility', exception: SpecFailedException, message: /Encoding::CompatibilityError/ do
-> do
"あれ".index char
end.should raise_error(Encoding::CompatibilityError)
end
end

it "handles a substring in a superset encoding" do
'abc'.dup.force_encoding(Encoding::US_ASCII).index('é').should == nil
end

it "handles a substring in a subset encoding" do
'été'.index('t'.dup.force_encoding(Encoding::US_ASCII)).should == 1
end

it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
NATFIXME 'Implement ISO-2022-JP', exception: ArgumentError, message: 'unknown encoding name - "ISO-2022-JP"' do
str = 'abc'.dup.force_encoding("ISO-2022-JP")
pattern = 'b'.dup.force_encoding("EUC-JP")

-> { str.index(pattern) }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: ISO-2022-JP and EUC-JP")
end
end
end

describe "String#index with Regexp" do
it "behaves the same as String#index(string) for escaped string regexps" do
NATFIXME 'handle escaped regex better', exception: SpecFailedException do
["blablabla", "hello cruel world...!"].each do |str|
["", "b", "bla", "lab", "o c", "d."].each do |needle|
regexp = Regexp.new(Regexp.escape(needle))
str.index(regexp).should == str.index(needle)

0.upto(str.size + 1) do |start|
str.index(regexp, start).should == str.index(needle, start)
end

(-str.size - 1).upto(-1) do |start|
str.index(regexp, start).should == str.index(needle, start)
end
end
end
end
end

it "returns the index of the first match of regexp" do
"blablabla".index(/bla/).should == 0
"blablabla".index(/BLA/i).should == 0

"blablabla".index(/.{0}/).should == 0
"blablabla".index(/.{6}/).should == 0
"blablabla".index(/.{9}/).should == 0

"blablabla".index(/.*/).should == 0
"blablabla".index(/.+/).should == 0

"blablabla".index(/lab|b/).should == 0

not_supported_on :opal do
"blablabla".index(/\A/).should == 0
NATFIXME 'fix matching anchors', exception: SpecFailedException do
"blablabla".index(/\Z/).should == 9
"blablabla".index(/\z/).should == 9
"blablabla\n".index(/\Z/).should == 9
"blablabla\n".index(/\z/).should == 10
end
end

"blablabla".index(/^/).should == 0
"\nblablabla".index(/^/).should == 0
"b\nablabla".index(/$/).should == 1
"bl\nablabla".index(/$/).should == 2

"blablabla".index(/.l./).should == 0
end

it "sets $~ to MatchData of match and nil when there's none" do
NATFIXME 'Set $~', exception: NoMethodError, message: "undefined method `[]' for nil" do
'hello.'.index(/.(.)/)
$~[0].should == 'he'

'hello.'.index(/not/)
$~.should == nil
end
end

ruby_bug "#20421", ""..."3.3" do
it "always clear $~" do
"a".index(/a/)
$~.should_not == nil

string = "blablabla"
string.index(/bla/, string.length + 1)
$~.should == nil
end
end

it "starts the search at the given offset" do
"blablabla".index(/.{0}/, 5).should == 5
"blablabla".index(/.{1}/, 5).should == 5
"blablabla".index(/.{2}/, 5).should == 5
"blablabla".index(/.{3}/, 5).should == 5
"blablabla".index(/.{4}/, 5).should == 5

"blablabla".index(/.{0}/, 3).should == 3
"blablabla".index(/.{1}/, 3).should == 3
"blablabla".index(/.{2}/, 3).should == 3
"blablabla".index(/.{5}/, 3).should == 3
"blablabla".index(/.{6}/, 3).should == 3

"blablabla".index(/.l./, 0).should == 0
"blablabla".index(/.l./, 1).should == 3
"blablabla".index(/.l./, 2).should == 3
"blablabla".index(/.l./, 3).should == 3

"xblaxbla".index(/x./, 0).should == 0
"xblaxbla".index(/x./, 1).should == 4
"xblaxbla".index(/x./, 2).should == 4

not_supported_on :opal do
"blablabla\n".index(/\Z/, 9).should == 9
end
end

it "starts the search at offset + self.length if offset is negative" do
str = "blablabla"

["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle|
(-str.length .. -1).each do |offset|
str.index(needle, offset).should ==
str.index(needle, offset + str.length)
end
end
end

it "returns nil if the substring isn't found" do
"blablabla".index(/BLA/).should == nil

"blablabla".index(/.{10}/).should == nil
"blaxbla".index(/.x/, 3).should == nil
"blaxbla".index(/..x/, 2).should == nil
end

it "returns nil if the Regexp matches the empty string and the offset is out of range" do
"ruby".index(//,12).should be_nil
end

it "supports \\G which matches at the given start offset" do
"helloYOU.".index(/\GYOU/, 5).should == 5
"helloYOU.".index(/\GYOU/).should == nil

re = /\G.+YOU/
# The # marks where \G will match.
[
["#hi!YOUall.", 0],
["h#i!YOUall.", 1],
["hi#!YOUall.", 2],
["hi!#YOUall.", nil]
].each do |spec|

start = spec[0].index("#")
str = spec[0].delete("#")

str.index(re, start).should == spec[1]
end
end

it "converts start_offset to an integer via to_int" do
obj = mock('1')
obj.should_receive(:to_int).and_return(1)
"RWOARW".index(/R./, obj).should == 4
end

it "returns the character index of a multibyte character" do
"ありがとう".index(/が/).should == 2
end

it "returns the character index after offset" do
"われわれ".index(/わ/, 1).should == 2
end

it "treats the offset as a character index" do
"われわわれ".index(/わ/, 3).should == 3
end

ruby_bug "#19763", ""..."3.3.0" do
it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
re = Regexp.new "れ".encode(Encoding::EUC_JP)
-> do
"あれ".index re
end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)")
end
end

# The exception message was incorrectly "incompatible character encodings: UTF-8 and EUC-JP" before 3.3.0
# Still test that the right exception class is used before that.
it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
re = Regexp.new "れ".encode(Encoding::EUC_JP)
NATFIXME 'check encoding compatibility', exception: SpecFailedException, message: /Encoding::CompatibilityError/ do
-> do
"あれ".index re
end.should raise_error(Encoding::CompatibilityError)
end
end
end
10 changes: 4 additions & 6 deletions spec/core/string/match_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@

describe "String#=~" do
it "behaves the same way as index() when given a regexp" do
NATFIXME 'Implement Regexp argument in String#index', exception: TypeError, message: 'no implicit conversion of Regexp into String' do
("rudder" =~ /udder/).should == "rudder".index(/udder/)
("boat" =~ /[^fl]oat/).should == "boat".index(/[^fl]oat/)
("bean" =~ /bag/).should == "bean".index(/bag/)
("true" =~ /false/).should == "true".index(/false/)
end
("rudder" =~ /udder/).should == "rudder".index(/udder/)
("boat" =~ /[^fl]oat/).should == "boat".index(/[^fl]oat/)
("bean" =~ /bag/).should == "bean".index(/bag/)
("true" =~ /false/).should == "true".index(/false/)
end

it "raises a TypeError if a obj is a string" do
Expand Down
Loading

0 comments on commit c1f443a

Please sign in to comment.