From 478e3e1a85526edd8c6f01dd04b9032211814221 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 12 Nov 2024 11:35:12 +0100 Subject: [PATCH] Mark strings returned by `Symbol#to_s` as chilled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Feature #20350] `STR_CHILLED` now spans on two user flags. If one bit is set it marks a chilled string literal, if it's the other it marks a `Symbol#to_s` chilled string. Since it's not possible, and doesn't make much sense to include debug info when `--debug-frozen-string-literal` is set, we can't include allocation source, but we can safely include the symbol name in the warning message, making it much easier to find the source of the issue. Co-Authored-By: Étienne Barrié --- compile.c | 2 +- error.c | 11 +- include/ruby/internal/intern/error.h | 2 +- internal/string.h | 29 ++-- object.c | 8 +- prism_compile.c | 2 +- spec/ruby/core/string/chilled_string_spec.rb | 166 +++++++++++++------ string.c | 16 +- variable.c | 2 +- 9 files changed, 167 insertions(+), 71 deletions(-) diff --git a/compile.c b/compile.c index e777fba89ababa..10d02b24476786 100644 --- a/compile.c +++ b/compile.c @@ -8950,7 +8950,7 @@ compile_builtin_attr(rb_iseq_t *iseq, const NODE *node) if (!SYMBOL_P(symbol)) goto non_symbol_arg; - string = rb_sym_to_s(symbol); + string = rb_sym2str(symbol); if (strcmp(RSTRING_PTR(string), "leaf") == 0) { ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_LEAF; } diff --git a/error.c b/error.c index 3fc1d96d6c52f8..384a56f5df01bc 100644 --- a/error.c +++ b/error.c @@ -4016,7 +4016,7 @@ rb_error_frozen_object(VALUE frozen_obj) } void -rb_warn_unchilled(VALUE obj) +rb_warn_unchilled_literal(VALUE obj) { rb_warning_category_t category = RB_WARN_CATEGORY_DEPRECATED; if (!NIL_P(ruby_verbose) && rb_warning_category_enabled_p(category)) { @@ -4047,6 +4047,15 @@ rb_warn_unchilled(VALUE obj) } } +void +rb_warn_unchilled_symbol_to_s(VALUE obj) +{ + rb_category_warn( + RB_WARN_CATEGORY_DEPRECATED, + "warning: string returned by :%s.to_s will be frozen in the future", RSTRING_PTR(obj) + ); +} + #undef rb_check_frozen void rb_check_frozen(VALUE obj) diff --git a/include/ruby/internal/intern/error.h b/include/ruby/internal/intern/error.h index b1e2c130b94ff9..1fd9ec2f516eb3 100644 --- a/include/ruby/internal/intern/error.h +++ b/include/ruby/internal/intern/error.h @@ -258,7 +258,7 @@ rb_check_frozen_inline(VALUE obj) /* ref: internal CHILLED_STRING_P() This is an implementation detail subject to change. */ - if (RB_UNLIKELY(RB_TYPE_P(obj, T_STRING) && FL_TEST_RAW(obj, RUBY_FL_USER3))) { + if (RB_UNLIKELY(RB_TYPE_P(obj, T_STRING) && FL_TEST_RAW(obj, RUBY_FL_USER2 | RUBY_FL_USER3))) { // STR_CHILLED rb_str_modify(obj); } } diff --git a/internal/string.h b/internal/string.h index 080acb3c3c5c90..efeb0827c98fc3 100644 --- a/internal/string.h +++ b/internal/string.h @@ -15,9 +15,11 @@ #include "ruby/encoding.h" /* for rb_encoding */ #include "ruby/ruby.h" /* for VALUE */ -#define STR_SHARED FL_USER0 /* = ELTS_SHARED */ -#define STR_NOEMBED FL_USER1 -#define STR_CHILLED FL_USER3 +#define STR_SHARED FL_USER0 /* = ELTS_SHARED */ +#define STR_NOEMBED FL_USER1 +#define STR_CHILLED (FL_USER2 | FL_USER3) +#define STR_CHILLED_LITERAL FL_USER2 +#define STR_CHILLED_SYMBOL_TO_S FL_USER3 enum ruby_rstring_private_flags { RSTRING_CHILLED = STR_CHILLED, @@ -60,7 +62,8 @@ VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE); VALUE rb_str_with_debug_created_info(VALUE, VALUE, int); /* error.c */ -void rb_warn_unchilled(VALUE str); +void rb_warn_unchilled_literal(VALUE str); +void rb_warn_unchilled_symbol_to_s(VALUE str); static inline bool STR_EMBED_P(VALUE str); static inline bool STR_SHARED_P(VALUE str); @@ -127,14 +130,18 @@ CHILLED_STRING_P(VALUE obj) static inline void CHILLED_STRING_MUTATED(VALUE str) { + VALUE chilled_reason = RB_FL_TEST_RAW(str, STR_CHILLED); FL_UNSET_RAW(str, STR_CHILLED); - rb_warn_unchilled(str); -} - -static inline void -STR_CHILL_RAW(VALUE str) -{ - FL_SET_RAW(str, STR_CHILLED); + switch (chilled_reason) { + case STR_CHILLED_SYMBOL_TO_S: + rb_warn_unchilled_symbol_to_s(str); + break; + case STR_CHILLED_LITERAL: + rb_warn_unchilled_literal(str); + break; + default: + rb_bug("RString was chilled for multiple reasons"); + } } static inline bool diff --git a/object.c b/object.c index 1bd476b0228a0f..ac9fa91867ed12 100644 --- a/object.c +++ b/object.c @@ -500,10 +500,12 @@ rb_obj_clone_setup(VALUE obj, VALUE clone, VALUE kwfreeze) case Qnil: rb_funcall(clone, id_init_clone, 1, obj); RBASIC(clone)->flags |= RBASIC(obj)->flags & FL_FREEZE; - if (CHILLED_STRING_P(obj)) { - STR_CHILL_RAW(clone); + + if (RB_TYPE_P(obj, T_STRING)) { + FL_SET_RAW(clone, FL_TEST_RAW(obj, STR_CHILLED)); } - else if (RB_OBJ_FROZEN(obj)) { + + if (RB_OBJ_FROZEN(obj)) { rb_shape_t * next_shape = rb_shape_transition_shape_frozen(clone); if (!rb_shape_obj_too_complex(clone) && next_shape->type == SHAPE_OBJ_TOO_COMPLEX) { rb_evict_ivars_to_hash(clone); diff --git a/prism_compile.c b/prism_compile.c index f4a35fa429297e..fe5fde8d6305f6 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -3382,7 +3382,7 @@ pm_compile_builtin_attr(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, cons } VALUE symbol = pm_static_literal_value(iseq, argument, scope_node); - VALUE string = rb_sym_to_s(symbol); + VALUE string = rb_sym2str(symbol); if (strcmp(RSTRING_PTR(string), "leaf") == 0) { ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_LEAF; diff --git a/spec/ruby/core/string/chilled_string_spec.rb b/spec/ruby/core/string/chilled_string_spec.rb index 9f81b1af6d6597..b8fb6eedc94653 100644 --- a/spec/ruby/core/string/chilled_string_spec.rb +++ b/spec/ruby/core/string/chilled_string_spec.rb @@ -2,69 +2,141 @@ describe "chilled String" do guard -> { ruby_version_is "3.4" and !"test".equal?("test") } do - describe "#frozen?" do - it "returns false" do - "chilled".frozen?.should == false + describe "chilled string literals" do + + describe "#frozen?" do + it "returns false" do + "chilled".frozen?.should == false + end end - end - describe "#-@" do - it "returns a different instance" do - input = "chilled" - interned = (-input) - interned.frozen?.should == true - interned.object_id.should_not == input.object_id + describe "#-@" do + it "returns a different instance" do + input = "chilled" + interned = (-input) + interned.frozen?.should == true + interned.object_id.should_not == input.object_id + end end - end - describe "#+@" do - it "returns a different instance" do - input = "chilled" - duped = (+input) - duped.frozen?.should == false - duped.object_id.should_not == input.object_id + describe "#+@" do + it "returns a different instance" do + input = "chilled" + duped = (+input) + duped.frozen?.should == false + duped.object_id.should_not == input.object_id + end end - end - describe "#clone" do - it "preserves chilled status" do - input = "chilled".clone - -> { - input << "-mutated" - }.should complain(/literal string will be frozen in the future/) - input.should == "chilled-mutated" + describe "#clone" do + it "preserves chilled status" do + input = "chilled".clone + -> { + input << "-mutated" + }.should complain(/literal string will be frozen in the future/) + input.should == "chilled-mutated" + end + end + + describe "mutation" do + it "emits a warning" do + input = "chilled" + -> { + input << "-mutated" + }.should complain(/literal string will be frozen in the future/) + input.should == "chilled-mutated" + end + + it "emits a warning on singleton_class creation" do + -> { + "chilled".singleton_class + }.should complain(/literal string will be frozen in the future/) + end + + it "emits a warning on instance variable assignment" do + -> { + "chilled".instance_variable_set(:@ivar, 42) + }.should complain(/literal string will be frozen in the future/) + end + + it "raises FrozenError after the string was explicitly frozen" do + input = "chilled" + input.freeze + -> { + -> { + input << "mutated" + }.should raise_error(FrozenError) + }.should_not complain(/literal string will be frozen in the future/) + end end end - describe "mutation" do - it "emits a warning" do - input = "chilled" - -> { - input << "-mutated" - }.should complain(/literal string will be frozen in the future/) - input.should == "chilled-mutated" + describe "chilled strings returned by Symbol#to_s" do + + describe "#frozen?" do + it "returns false" do + :chilled.to_s.frozen?.should == false + end end - it "emits a warning on singleton_class creation" do - -> { - "chilled".singleton_class - }.should complain(/literal string will be frozen in the future/) + describe "#-@" do + it "returns a different instance" do + input = :chilled.to_s + interned = (-input) + interned.frozen?.should == true + interned.object_id.should_not == input.object_id + end end - it "emits a warning on instance variable assignment" do - -> { - "chilled".instance_variable_set(:@ivar, 42) - }.should complain(/literal string will be frozen in the future/) + describe "#+@" do + it "returns a different instance" do + input = :chilled.to_s + duped = (+input) + duped.frozen?.should == false + duped.object_id.should_not == input.object_id + end + end + + describe "#clone" do + it "preserves chilled status" do + input = :chilled.to_s.clone + -> { + input << "-mutated" + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + input.should == "chilled-mutated" + end end - it "raises FrozenError after the string was explicitly frozen" do - input = "chilled" - input.freeze - -> { + describe "mutation" do + it "emits a warning" do + input = :chilled.to_s + -> { + input << "-mutated" + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + input.should == "chilled-mutated" + end + + it "emits a warning on singleton_class creation" do + -> { + :chilled.to_s.singleton_class + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + end + + it "emits a warning on instance variable assignment" do + -> { + :chilled.to_s.instance_variable_set(:@ivar, 42) + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + end + + it "raises FrozenError after the string was explicitly frozen" do + input = :chilled.to_s + input.freeze -> { - input << "mutated" - }.should raise_error(FrozenError) - }.should_not complain(/literal string will be frozen in the future/) + -> { + input << "mutated" + }.should raise_error(FrozenError) + }.should_not complain(/string returned by :chilled\.to_s will be frozen in the future/) + end end end end diff --git a/string.c b/string.c index 5d7bb509fa4a35..9f21a54354e151 100644 --- a/string.c +++ b/string.c @@ -87,8 +87,12 @@ VALUE rb_cSymbol; * The string is not embedded. When a string is embedded, the contents * follow the header. When a string is not embedded, the contents is * on a separately allocated buffer. - * 3: STR_CHILLED (will be frozen in a future version) - * The string appears frozen but can be mutated with a warning. + * 2: STR_CHILLED_LITERAL (will be frozen in a future version) + * The string was allocated as a literal in a file without an explicit `frozen_string_literal` comment. + * It emits a deprecation warning when mutated for the first time. + * 3: STR_CHILLED_SYMBOL_TO_S (will be frozen in a future version) + * The string was allocated by the `Symbol#to_s` method. + * It emits a deprecation warning when mutated for the first time. * 4: STR_PRECOMPUTED_HASH * The string is embedded and has its precomputed hascode stored * after the terminator. @@ -1958,7 +1962,7 @@ rb_ec_str_resurrect(struct rb_execution_context_struct *ec, VALUE str, bool chil str_duplicate_setup_heap(klass, str, new_str); } if (chilled) { - STR_CHILL_RAW(new_str); + FL_SET_RAW(new_str, STR_CHILLED_LITERAL); } return new_str; } @@ -1969,7 +1973,7 @@ rb_str_with_debug_created_info(VALUE str, VALUE path, int line) VALUE debug_info = rb_ary_new_from_args(2, path, INT2FIX(line)); if (OBJ_FROZEN_RAW(str)) str = rb_str_dup(str); rb_ivar_set(str, id_debug_created_info, rb_ary_freeze(debug_info)); - STR_CHILL_RAW(str); + FL_SET_RAW(str, STR_CHILLED_LITERAL); return rb_str_freeze(str); } @@ -12148,7 +12152,9 @@ sym_inspect(VALUE sym) VALUE rb_sym_to_s(VALUE sym) { - return str_new_shared(rb_cString, rb_sym2str(sym)); + VALUE str = str_new_shared(rb_cString, rb_sym2str(sym)); + FL_SET_RAW(str, STR_CHILLED_SYMBOL_TO_S); + return str; } VALUE diff --git a/variable.c b/variable.c index 5e178ef06b1bb4..61c188bedee9a1 100644 --- a/variable.c +++ b/variable.c @@ -1814,7 +1814,7 @@ void rb_obj_freeze_inline(VALUE x) if (RB_FL_ABLE(x)) { RB_FL_SET_RAW(x, RUBY_FL_FREEZE); if (TYPE(x) == T_STRING) { - RB_FL_UNSET_RAW(x, FL_USER3); // STR_CHILLED + RB_FL_UNSET_RAW(x, FL_USER2 | FL_USER3); // STR_CHILLED } rb_shape_t * next_shape = rb_shape_transition_shape_frozen(x);