Skip to content

Commit

Permalink
Merge pull request #2134 from natalie-lang/string-encode-fallback-option
Browse files Browse the repository at this point in the history
  • Loading branch information
seven1m authored Jun 22, 2024
2 parents e82b60f + ee99369 commit a1dbb3b
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 113 deletions.
7 changes: 7 additions & 0 deletions include/natalie/encoding_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ class EncodingObject : public Object {
Replace,
};

enum class EncodeUndefOption {
Raise,
Replace,
};

enum class EncodeNewlineOption {
None,
Cr,
Expand All @@ -93,8 +98,10 @@ class EncodingObject : public Object {

struct EncodeOptions {
EncodeInvalidOption invalid_option = EncodeInvalidOption::Raise;
EncodeUndefOption undef_option = EncodeUndefOption::Raise;
EncodeNewlineOption newline_option = EncodeNewlineOption::None;
StringObject *replace_option = nullptr;
Value fallback_option = nullptr;
};

virtual Value encode(Env *, EncodingObject *, StringObject *, EncodeOptions) const;
Expand Down
1 change: 1 addition & 0 deletions include/natalie/string_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ class StringObject : public Object {
using EncodeOptions = EncodingObject::EncodeOptions;
using EncodeInvalidOption = EncodingObject::EncodeInvalidOption;
using EncodeNewlineOption = EncodingObject::EncodeNewlineOption;
using EncodeUndefOption = EncodingObject::EncodeUndefOption;

String m_string {};
EncodingObject *m_encoding { nullptr };
Expand Down
66 changes: 26 additions & 40 deletions spec/core/string/encode_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -92,63 +92,51 @@
end

it "replace multiple invalid bytes at the end with a single replacement character" do
NATFIXME 'encode options' do
NATFIXME 'same encoding with invalid chars', exception: SpecFailedException, message: /should be ==/ do
"\xE3\x81\x93\xE3\x81".encode("UTF-8", invalid: :replace).should == "\u3053\ufffd"
end
end

it "replaces invalid encoding in source using a specified replacement even when a fallback is given" do
NATFIXME 'encode options' do
encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: -> c { "bar" })
encoded.should == "\u3061foofoo".encode("UTF-16LE")
encoded.encode("UTF-8").should == "ちfoofoo"
end
encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: -> c { "bar" })
encoded.should == "\u3061foofoo".encode("UTF-16LE")
encoded.encode("UTF-8").should == "ちfoofoo"
end

it "replaces undefined encoding in destination with default replacement" do
NATFIXME 'encode options' do
encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace)
encoded.should == "B?".encode(Encoding::US_ASCII)
encoded.encode("UTF-8").should == "B?"
end
encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace)
encoded.should == "B?".encode(Encoding::US_ASCII)
encoded.encode("UTF-8").should == "B?"
end

it "replaces undefined encoding in destination with a specified replacement" do
NATFIXME 'encode options' do
encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo")
encoded.should == "Bfoo".encode(Encoding::US_ASCII)
encoded.encode("UTF-8").should == "Bfoo"
end
encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo")
encoded.should == "Bfoo".encode(Encoding::US_ASCII)
encoded.encode("UTF-8").should == "Bfoo"
end

it "replaces undefined encoding in destination with a specified replacement even if a fallback is given" do
NATFIXME 'encode options' do
encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo", fallback: proc {|x| "bar"})
encoded.should == "Bfoo".encode(Encoding::US_ASCII)
encoded.encode("UTF-8").should == "Bfoo"
end
encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo", fallback: proc {|x| "bar"})
encoded.should == "Bfoo".encode(Encoding::US_ASCII)
encoded.encode("UTF-8").should == "Bfoo"
end

it "replaces undefined encoding in destination using a fallback proc" do
NATFIXME 'encode fallback' do
encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc {|x| "bar"})
encoded.should == "Bbar".encode(Encoding::US_ASCII)
encoded.encode("UTF-8").should == "Bbar"
end
encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc {|x| "bar"})
encoded.should == "Bbar".encode(Encoding::US_ASCII)
encoded.encode("UTF-8").should == "Bbar"
end

it "replaces invalid encoding in source using replace even when fallback is given as proc" do
NATFIXME 'encode options' do
encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: proc {|x| "bar"})
encoded.should == "\u3061foofoo".encode("UTF-16LE")
encoded.encode("UTF-8").should == "ちfoofoo"
end
encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: proc {|x| "bar"})
encoded.should == "\u3061foofoo".encode("UTF-16LE")
encoded.encode("UTF-8").should == "ちfoofoo"
end
end

describe "when passed to, from" do
it "returns a copy in the destination encoding when both encodings are the same" do
NATFIXME 'src encoding' do
NATFIXME 'honor source encoding', exception: Encoding::UndefinedConversionError, message: /from ASCII-8BIT to UTF-8/ do
str = "あ".dup.force_encoding("binary")
encoded = str.encode("utf-8", "utf-8")

Expand All @@ -159,7 +147,7 @@
end

it "returns the transcoded string" do
NATFIXME 'not sure' do
NATFIXME 'honor source encoding', exception: SpecFailedException, message: /should be ==/ do
str = "\x00\x00\x00\x1F"
str.encode(Encoding::UTF_8, Encoding::UTF_16BE).should == "\u0000\u001f"
end
Expand All @@ -168,12 +156,10 @@

describe "when passed to, options" do
it "returns a copy when the destination encoding is the same as the String encoding" do
NATFIXME 'encode options' do
str = "あ"
encoded = str.encode(Encoding::UTF_8, undef: :replace)
encoded.should_not equal(str)
encoded.should == str
end
str = "あ"
encoded = str.encode(Encoding::UTF_8, undef: :replace)
encoded.should_not equal(str)
encoded.should == str
end
end

Expand All @@ -186,7 +172,7 @@
end

it "returns a copy in the destination encoding when both encodings are the same" do
NATFIXME 'encode options' do
NATFIXME 'honor source encoding', exception: Encoding::UndefinedConversionError, message: /from ASCII-8BIT to UTF-8/ do
str = "あ".dup.force_encoding("binary")
encoded = str.encode("utf-8", "utf-8", invalid: :replace)

Expand Down
Loading

0 comments on commit a1dbb3b

Please sign in to comment.