Skip to content

Commit

Permalink
Honor source encoding argument in String#encode
Browse files Browse the repository at this point in the history
  • Loading branch information
seven1m committed Jun 26, 2024
1 parent 407f51b commit 359197e
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 51 deletions.
1 change: 1 addition & 0 deletions include/natalie/object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ class Object : public Cell {

ArrayObject *as_array_or_raise(Env *);
ClassObject *as_class_or_raise(Env *);
EncodingObject *as_encoding_or_raise(Env *);
ExceptionObject *as_exception_or_raise(Env *);
FloatObject *as_float_or_raise(Env *);
HashObject *as_hash_or_raise(Env *);
Expand Down
4 changes: 2 additions & 2 deletions spec/core/string/encode_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@

describe "when passed to, from" do
it "returns a copy in the destination encoding when both encodings are the same" do
NATFIXME 'honor source encoding', exception: Encoding::UndefinedConversionError, message: /from ASCII-8BIT to UTF-8/ do
NATFIXME 'same encoding for source and destination', exception: SpecFailedException, message: '#<Encoding:ASCII-8BIT> should be == to #<Encoding:UTF-8>' do
str = "あ".dup.force_encoding("binary")
encoded = str.encode("utf-8", "utf-8")

Expand Down Expand Up @@ -172,7 +172,7 @@
end

it "returns a copy in the destination encoding when both encodings are the same" do
NATFIXME 'honor source encoding', exception: Encoding::UndefinedConversionError, message: /from ASCII-8BIT to UTF-8/ do
NATFIXME 'same encoding for source and destination', exception: SpecFailedException, message: '#<Encoding:ASCII-8BIT> should be == to #<Encoding:UTF-8>' do
str = "あ".dup.force_encoding("binary")
encoded = str.encode("utf-8", "utf-8", invalid: :replace)

Expand Down
78 changes: 32 additions & 46 deletions spec/core/string/shared/encode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -127,25 +127,21 @@

describe "when passed to, from" do
it "transcodes between the encodings ignoring the String encoding" do
NATFIXME 'honor source encoding', exception: SpecFailedException, message: /should be ==/ do
str = "あ"
result = [0xA6, 0xD0, 0x8F, 0xAB, 0xE4, 0x8F, 0xAB, 0xB1].pack('C8')
result.force_encoding Encoding::EUC_JP
str.send(@method, "euc-jp", "ibm437").should == result
end
str = "あ"
result = [0xA6, 0xD0, 0x8F, 0xAB, 0xE4, 0x8F, 0xAB, 0xB1].pack('C8')
result.force_encoding Encoding::EUC_JP
str.send(@method, "euc-jp", "ibm437").should == result
end

it "calls #to_str to convert the from object to an Encoding" do
NATFIXME 'honor source encoding', exception: SpecFailedException, message: /should be ==/ do
enc = mock("string encode encoding")
enc.should_receive(:to_str).and_return("ibm437")
enc = mock("string encode encoding")
enc.should_receive(:to_str).and_return("ibm437")

str = "あ"
result = [0xA6, 0xD0, 0x8F, 0xAB, 0xE4, 0x8F, 0xAB, 0xB1].pack('C8')
result.force_encoding Encoding::EUC_JP
str = "あ"
result = [0xA6, 0xD0, 0x8F, 0xAB, 0xE4, 0x8F, 0xAB, 0xB1].pack('C8')
result.force_encoding Encoding::EUC_JP

str.send(@method, "euc-jp", enc).should == result
end
str.send(@method, "euc-jp", enc).should == result
end
end

Expand Down Expand Up @@ -174,53 +170,43 @@

describe "when passed to, from, options" do
it "replaces undefined characters in the destination encoding" do
NATFIXME 'honor source encoding', exception: Encoding::UndefinedConversionError, message: /to UTF-8 in conversion from ASCII-8BIT to UTF-8 to EUC-JP/ do
str = "あ?あ".force_encoding Encoding::BINARY
result = str.send(@method, "euc-jp", "utf-8", undef: :replace)
xA4xA2 = [0xA4, 0xA2].pack('CC').force_encoding('utf-8')
result.should == "#{xA4xA2}?#{xA4xA2}".force_encoding("euc-jp")
end
str = "あ?あ".force_encoding Encoding::BINARY
result = str.send(@method, "euc-jp", "utf-8", undef: :replace)
xA4xA2 = [0xA4, 0xA2].pack('CC').force_encoding('utf-8')
result.should == "#{xA4xA2}?#{xA4xA2}".force_encoding("euc-jp")
end

it "replaces invalid characters in the destination encoding" do
NATFIXME 'honor source encoding', exception: Encoding::UndefinedConversionError, message: /to UTF-8 in conversion from ASCII-8BIT to UTF-8 to ISO-8859-1/ do
xFF = [0xFF].pack('C').force_encoding('utf-8')
str = "ab#{xFF}c".force_encoding Encoding::BINARY
str.send(@method, "iso-8859-1", "utf-8", invalid: :replace).should == "ab?c"
end
xFF = [0xFF].pack('C').force_encoding('utf-8')
str = "ab#{xFF}c".force_encoding Encoding::BINARY
str.send(@method, "iso-8859-1", "utf-8", invalid: :replace).should == "ab?c"
end

it "calls #to_str to convert the to object to an encoding" do
NATFIXME 'honor source encoding', exception: Encoding::UndefinedConversionError, message: /to UTF-8 in conversion from ASCII-8BIT to UTF-8 to ISO-8859-1/ do
to = mock("string encode to encoding")
to.should_receive(:to_str).and_return("iso-8859-1")
to = mock("string encode to encoding")
to.should_receive(:to_str).and_return("iso-8859-1")

xFF = [0xFF].pack('C').force_encoding('utf-8')
str = "ab#{xFF}c".force_encoding Encoding::BINARY
str.send(@method, to, "utf-8", invalid: :replace).should == "ab?c"
end
xFF = [0xFF].pack('C').force_encoding('utf-8')
str = "ab#{xFF}c".force_encoding Encoding::BINARY
str.send(@method, to, "utf-8", invalid: :replace).should == "ab?c"
end

it "calls #to_str to convert the from object to an encoding" do
NATFIXME 'honor source encoding', exception: Encoding::UndefinedConversionError, message: /to UTF-8 in conversion from ASCII-8BIT to UTF-8 to ISO-8859-1/ do
from = mock("string encode to encoding")
from.should_receive(:to_str).and_return("utf-8")
from = mock("string encode to encoding")
from.should_receive(:to_str).and_return("utf-8")

xFF = [0xFF].pack('C').force_encoding('utf-8')
str = "ab#{xFF}c".force_encoding Encoding::BINARY
str.send(@method, "iso-8859-1", from, invalid: :replace).should == "ab?c"
end
xFF = [0xFF].pack('C').force_encoding('utf-8')
str = "ab#{xFF}c".force_encoding Encoding::BINARY
str.send(@method, "iso-8859-1", from, invalid: :replace).should == "ab?c"
end

it "calls #to_hash to convert the options object" do
NATFIXME 'keyword splat should call to_hash?' do
options = mock("string encode options")
options.should_receive(:to_hash).and_return({ invalid: :replace })
options = mock("string encode options")
options.should_receive(:to_hash).and_return({ invalid: :replace })

xFF = [0xFF].pack('C').force_encoding('utf-8')
str = "ab#{xFF}c".force_encoding Encoding::BINARY
str.send(@method, "iso-8859-1", "utf-8", **options).should == "ab?c"
end
xFF = [0xFF].pack('C').force_encoding('utf-8')
str = "ab#{xFF}c".force_encoding Encoding::BINARY
str.send(@method, "iso-8859-1", "utf-8", **options).should == "ab?c"
end
end

Expand Down
6 changes: 6 additions & 0 deletions src/object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,12 @@ ClassObject *Object::as_class_or_raise(Env *env) {
return static_cast<ClassObject *>(this);
}

EncodingObject *Object::as_encoding_or_raise(Env *env) {
if (!is_encoding())
env->raise("TypeError", "{} can't be coerced into Encoding", m_klass->inspect_str());
return static_cast<EncodingObject *>(this);
}

ExceptionObject *Object::as_exception_or_raise(Env *env) {
if (!is_exception())
env->raise("TypeError", "{} can't be coerced into Exception", m_klass->inspect_str());
Expand Down
9 changes: 6 additions & 3 deletions src/string_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,9 @@ Value StringObject::encode_in_place(Env *env, Value dst_encoding, Value src_enco
if (!dst_encoding)
dst_encoding = EncodingObject::get(Encoding::UTF_8);

if (!src_encoding)
src_encoding = m_encoding;

EncodeOptions options;
if (kwargs) {
if (kwargs->remove(env, "universal_newline"_s))
Expand Down Expand Up @@ -1172,9 +1175,9 @@ Value StringObject::encode_in_place(Env *env, Value dst_encoding, Value src_enco
}

env->ensure_no_extra_keywords(kwargs);
auto orig_encoding = m_encoding;
EncodingObject *encoding_obj = EncodingObject::find_encoding(env, dst_encoding);
return encoding_obj->encode(env, orig_encoding, this, options);
EncodingObject *dst_encoding_obj = EncodingObject::find_encoding(env, dst_encoding);
EncodingObject *src_encoding_obj = EncodingObject::find_encoding(env, src_encoding);
return dst_encoding_obj->encode(env, src_encoding_obj, this, options);
}

Value StringObject::force_encoding(Env *env, Value encoding) {
Expand Down

0 comments on commit 359197e

Please sign in to comment.