From 753726de9ed3a9eb58f3c69b9770c0adb90852b3 Mon Sep 17 00:00:00 2001 From: Tim Morgan Date: Fri, 28 Jun 2024 10:08:55 -0500 Subject: [PATCH 1/4] Remove unused String::insert method This one is pretty unsafe since it has no bounds checking and it's not used anywhere. --- include/natalie/string_object.hpp | 1 - src/string_object.cpp | 4 ---- 2 files changed, 5 deletions(-) diff --git a/include/natalie/string_object.hpp b/include/natalie/string_object.hpp index 4edb677eb..c389fd85a 100644 --- a/include/natalie/string_object.hpp +++ b/include/natalie/string_object.hpp @@ -173,7 +173,6 @@ class StringObject : public Object { EncodingObject *assert_compatible_string_and_update_encoding(Env *, StringObject *); void prepend_char(Env *, char); - void insert(Env *, size_t, char); void append_char(char); void append(signed char); diff --git a/src/string_object.cpp b/src/string_object.cpp index cf7d031c3..ab9940528 100644 --- a/src/string_object.cpp +++ b/src/string_object.cpp @@ -3309,10 +3309,6 @@ void StringObject::prepend_char(Env *env, char c) { m_string.prepend_char(c); } -void StringObject::insert(Env *, size_t position, char c) { - m_string.insert(position, c); -} - void StringObject::append_char(char c) { m_string.append_char(c); } From 2dc0711cc8bb0c8b0c6f973f740b5551ac7bb7ce Mon Sep 17 00:00:00 2001 From: Tim Morgan Date: Fri, 28 Jun 2024 11:21:54 -0500 Subject: [PATCH 2/4] Update TM lib This adds support for String::append(ssize_t). --- ext/tm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/tm b/ext/tm index 751b55645..31445c39f 160000 --- a/ext/tm +++ b/ext/tm @@ -1 +1 @@ -Subproject commit 751b556450f0b850be680cd7c47094d704bc1253 +Subproject commit 31445c39f391cd9cfec018db3c537bb245088199 From 8c1e54e5612dcaa593cb06bc9a584cfeb91a8b0e Mon Sep 17 00:00:00 2001 From: Tim Morgan Date: Fri, 28 Jun 2024 11:23:37 -0500 Subject: [PATCH 3/4] Allow converting negative char index for insert too ...and check we don't go out of bounds. --- include/natalie/string_object.hpp | 2 +- src/string_object.cpp | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/natalie/string_object.hpp b/include/natalie/string_object.hpp index c389fd85a..1f6787eef 100644 --- a/include/natalie/string_object.hpp +++ b/include/natalie/string_object.hpp @@ -399,7 +399,7 @@ class StringObject : public Object { static size_t byte_index_to_char_index(ArrayObject *chars, size_t byte_index); - ssize_t char_index_to_byte_index(ssize_t) const; + ssize_t char_index_to_byte_index(ssize_t, bool = false) const; ssize_t byte_index_to_char_index(size_t) const; static CaseMapType check_case_options(Env *env, Value arg1, Value arg2, bool downcase = false); diff --git a/src/string_object.cpp b/src/string_object.cpp index ab9940528..b1fd9f385 100644 --- a/src/string_object.cpp +++ b/src/string_object.cpp @@ -1954,15 +1954,19 @@ size_t StringObject::byte_index_to_char_index(ArrayObject *chars, size_t byte_in return char_index; } -ssize_t StringObject::char_index_to_byte_index(ssize_t char_index) const { +// For String#insert, -1 means *after* the last character. +// For any method looking at the characters, -1 means to include the last character. +ssize_t StringObject::char_index_to_byte_index(ssize_t char_index, bool for_insert) const { if (char_index < 0) { size_t byte_index = bytesize(); - ssize_t current_char_index = 0; + ssize_t current_char_index = for_insert ? -1 : 0; TM::StringView view; - do { + while (char_index < current_char_index) { + if (byte_index == 0) + return -1; view = prev_char(&byte_index); current_char_index--; - } while (byte_index != 0 && char_index < current_char_index); + } return byte_index; } From d9867913fc2cfaa8af1947be9b67fcbe139284bd Mon Sep 17 00:00:00 2001 From: Tim Morgan Date: Fri, 28 Jun 2024 11:23:59 -0500 Subject: [PATCH 4/4] Refactor String#insert and make spec-compliant --- spec/core/string/insert_spec.rb | 12 +++++----- src/string_object.cpp | 39 ++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/spec/core/string/insert_spec.rb b/spec/core/string/insert_spec.rb index ac98f02d2..483f3c936 100644 --- a/spec/core/string/insert_spec.rb +++ b/spec/core/string/insert_spec.rb @@ -1,5 +1,5 @@ # -*- encoding: utf-8 -*- - +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -57,7 +57,7 @@ "ありがとう".insert(1, 'ü').should == "あüりがとう" end - xit "returns a String in the compatible encoding" do + it "returns a String in the compatible encoding" do str = "".force_encoding(Encoding::US_ASCII) str.insert(0, "ありがとう") str.encoding.should == Encoding::UTF_8 @@ -65,11 +65,9 @@ it "raises an Encoding::CompatibilityError if the encodings are incompatible" do pat = "ア".encode Encoding::EUC_JP - NATFIXME 'Raise Encoding::CompatibilityError', exception: SpecFailedException do - -> do - "あれ".insert 0, pat - end.should raise_error(Encoding::CompatibilityError) - end + -> do + "あれ".insert 0, pat + end.should raise_error(Encoding::CompatibilityError) end it "should not call subclassed string methods" do diff --git a/src/string_object.cpp b/src/string_object.cpp index b1fd9f385..da89745ff 100644 --- a/src/string_object.cpp +++ b/src/string_object.cpp @@ -2652,27 +2652,36 @@ bool StringObject::include(const char *arg) const { Value StringObject::insert(Env *env, Value index_obj, Value other_str) { assert_not_frozen(env); - nat_int_t index_i = index_obj->to_int(env)->to_nat_int_t(); + + auto char_index = IntegerObject::convert_to_native_type(env, index_obj->to_int(env)); StringObject *string = other_str->to_str(env); - size_t count = char_count(env); - size_t index = index_i < 0 ? (count + index_i + 1) : index_i; - if (index_i == -1) { + + if (char_index == -1) { + assert_compatible_string_and_update_encoding(env, string); append(string); return this; } - if (index > count) { - env->raise("IndexError", "index {} out of string", index_obj); - } - if (string->is_empty()) { + + auto byte_index = char_index_to_byte_index(char_index, true); + if (byte_index < 0) + env->raise("IndexError", "index {} out of string", char_index); + + if (string->is_empty()) + return this; + + if (m_encoding != string->m_encoding) + assert_compatible_string_and_update_encoding(env, string); + + if ((size_t)byte_index == m_string.length()) { + append(string); return this; } - Value slice = slice_in_place(env, Value::integer(index), Value::integer(count - index)); - append(string); - if (slice->is_string()) { - append(slice->as_string()); - } - // NATFIXME: Return string with compatible encoding - // NATFIXME: Raise Encoding::CompatibilityError if the encodings are incompatible + + String slice = m_string.substring(byte_index); + m_string.truncate(byte_index); + m_string.append(string->m_string); + m_string.append(slice); + return this; }