From 9b221ae0918d90288b5f9a852827f2a9cbc57bf4 Mon Sep 17 00:00:00 2001 From: Tim Morgan Date: Thu, 27 Jun 2024 07:32:20 -0500 Subject: [PATCH] Add :xml option to String#encode --- include/natalie/encoding_object.hpp | 7 ++++ include/natalie/string_object.hpp | 1 + spec/core/string/shared/encode.rb | 44 ++++++----------------- src/encoding_object.cpp | 56 ++++++++++++++++++++++++++++- src/string_object.cpp | 10 ++++++ 5 files changed, 84 insertions(+), 34 deletions(-) diff --git a/include/natalie/encoding_object.hpp b/include/natalie/encoding_object.hpp index 97b8ed0e3..6e988af39 100644 --- a/include/natalie/encoding_object.hpp +++ b/include/natalie/encoding_object.hpp @@ -96,10 +96,17 @@ class EncodingObject : public Object { Universal, }; + enum class EncodeXmlOption { + None, + Attr, + Text, + }; + struct EncodeOptions { EncodeInvalidOption invalid_option = EncodeInvalidOption::Raise; EncodeUndefOption undef_option = EncodeUndefOption::Raise; EncodeNewlineOption newline_option = EncodeNewlineOption::None; + EncodeXmlOption xml_option = EncodeXmlOption::None; StringObject *replace_option = nullptr; Value fallback_option = nullptr; }; diff --git a/include/natalie/string_object.hpp b/include/natalie/string_object.hpp index 7dd434524..4edb677eb 100644 --- a/include/natalie/string_object.hpp +++ b/include/natalie/string_object.hpp @@ -511,6 +511,7 @@ class StringObject : public Object { using EncodeOptions = EncodingObject::EncodeOptions; using EncodeInvalidOption = EncodingObject::EncodeInvalidOption; using EncodeNewlineOption = EncodingObject::EncodeNewlineOption; + using EncodeXmlOption = EncodingObject::EncodeXmlOption; using EncodeUndefOption = EncodingObject::EncodeUndefOption; String m_string {}; diff --git a/spec/core/string/shared/encode.rb b/spec/core/string/shared/encode.rb index daa964cc2..e87f37608 100644 --- a/spec/core/string/shared/encode.rb +++ b/spec/core/string/shared/encode.rb @@ -394,71 +394,49 @@ def replace_to_s(c) describe "given the xml: :text option" do it "replaces all instances of '&' with '&'" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - '& and &'.send(@method, "UTF-8", xml: :text).should == '& and &' - end + '& and &'.send(@method, "UTF-8", xml: :text).should == '& and &' end it "replaces all instances of '<' with '<'" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - '< and <'.send(@method, "UTF-8", xml: :text).should == '< and <' - end + '< and <'.send(@method, "UTF-8", xml: :text).should == '< and <' end it "replaces all instances of '>' with '>'" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - '> and >'.send(@method, "UTF-8", xml: :text).should == '> and >' - end + '> and >'.send(@method, "UTF-8", xml: :text).should == '> and >' end it "does not replace '\"'" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - '" and "'.send(@method, "UTF-8", xml: :text).should == '" and "' - end + '" and "'.send(@method, "UTF-8", xml: :text).should == '" and "' end it "replaces undefined characters with their upper-case hexadecimal numeric character references" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - 'ürst'.send(@method, Encoding::US_ASCII, xml: :text).should == 'ürst' - end + 'ürst'.send(@method, Encoding::US_ASCII, xml: :text).should == 'ürst' end end describe "given the xml: :attr option" do it "surrounds the encoded text with double-quotes" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - 'abc'.send(@method, "UTF-8", xml: :attr).should == '"abc"' - end + 'abc'.send(@method, "UTF-8", xml: :attr).should == '"abc"' end it "replaces all instances of '&' with '&'" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - '& and &'.send(@method, "UTF-8", xml: :attr).should == '"& and &"' - end + '& and &'.send(@method, "UTF-8", xml: :attr).should == '"& and &"' end it "replaces all instances of '<' with '<'" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - '< and <'.send(@method, "UTF-8", xml: :attr).should == '"< and <"' - end + '< and <'.send(@method, "UTF-8", xml: :attr).should == '"< and <"' end it "replaces all instances of '>' with '>'" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - '> and >'.send(@method, "UTF-8", xml: :attr).should == '"> and >"' - end + '> and >'.send(@method, "UTF-8", xml: :attr).should == '"> and >"' end it "replaces all instances of '\"' with '"'" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - '" and "'.send(@method, "UTF-8", xml: :attr).should == '"" and ""' - end + '" and "'.send(@method, "UTF-8", xml: :attr).should == '"" and ""' end it "replaces undefined characters with their upper-case hexadecimal numeric character references" do - NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do - 'ürst'.send(@method, Encoding::US_ASCII, xml: :attr).should == '"ürst"' - end + 'ürst'.send(@method, Encoding::US_ASCII, xml: :attr).should == '"ürst"' end end diff --git a/src/encoding_object.cpp b/src/encoding_object.cpp index b3b16de72..62f920827 100644 --- a/src/encoding_object.cpp +++ b/src/encoding_object.cpp @@ -12,8 +12,11 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje if (orig_encoding->num() == Encoding::ASCII_8BIT && num() == Encoding::ASCII_8BIT) return str; - StringObject temp_string = StringObject("", (EncodingObject *)this); ClassObject *EncodingClass = find_top_level_const(env, "Encoding"_s)->as_class(); + StringObject temp_string = StringObject("", (EncodingObject *)this); + + if (options.xml_option == EncodeXmlOption::Attr) + temp_string.append_char('"'); size_t index = 0; auto string = str->string(); @@ -48,6 +51,43 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje break; } + switch (options.xml_option) { + case EncodeXmlOption::None: + break; + case EncodeXmlOption::Attr: + switch (c) { + case '&': + temp_string.append("&"); + continue; + case '<': + temp_string.append("<"); + continue; + case '>': + temp_string.append(">"); + continue; + case '"': + temp_string.append("""); + continue; + default: + break; + } + break; + case EncodeXmlOption::Text: + switch (c) { + case '&': + temp_string.append("&"); + continue; + case '<': + temp_string.append("<"); + continue; + case '>': + temp_string.append(">"); + continue; + default: + break; + } + } + auto handle_fallback = [&](nat_int_t cpt) { auto ch = new StringObject { orig_encoding->encode_codepoint(cpt) }; Value result = NilObject::the(); @@ -124,10 +164,21 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje if (destination_codepoint < 0) { switch (options.undef_option) { case EncodeUndefOption::Raise: + switch (options.xml_option) { + case EncodeXmlOption::None: + break; + case EncodeXmlOption::Attr: + case EncodeXmlOption::Text: + auto entity = String::format("&#x{};", String::hex(unicode_codepoint, String::HexFormat::Uppercase)); + temp_string.append(entity); + continue; + } + if (options.fallback_option) { handle_fallback(unicode_codepoint); continue; } + StringObject *message; if (orig_encoding->num() != Encoding::UTF_8) message = StringObject::format( @@ -160,6 +211,9 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje temp_string.append(destination_char_obj); } + if (options.xml_option == EncodeXmlOption::Attr) + temp_string.append_char('"'); + str->set_str(temp_string.string().c_str(), temp_string.string().length()); str->set_encoding(EncodingObject::get(num())); return str; diff --git a/src/string_object.cpp b/src/string_object.cpp index 1117132e3..cf7d031c3 100644 --- a/src/string_object.cpp +++ b/src/string_object.cpp @@ -1172,6 +1172,16 @@ Value StringObject::encode_in_place(Env *env, Value dst_encoding, Value src_enco auto fallback = kwargs->remove(env, "fallback"_s); if (fallback && !fallback->is_nil()) options.fallback_option = fallback; + + auto xml = kwargs->remove(env, "xml"_s); + if (xml) { + if (xml == "attr"_s) + options.xml_option = EncodeXmlOption::Attr; + else if (xml == "text"_s) + options.xml_option = EncodeXmlOption::Text; + else + env->raise("ArgumentError", "unexpected value for xml option: {}", xml->inspect_str(env)); + } } auto find_encoding = [&](Value encoding) {