Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add :xml option to String#encode #2155

Merged
merged 1 commit into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions include/natalie/encoding_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,17 @@ class EncodingObject : public Object {
Universal,
};

enum class EncodeXmlOption {
None,
Attr,
Text,
};

struct EncodeOptions {
EncodeInvalidOption invalid_option = EncodeInvalidOption::Raise;
EncodeUndefOption undef_option = EncodeUndefOption::Raise;
EncodeNewlineOption newline_option = EncodeNewlineOption::None;
EncodeXmlOption xml_option = EncodeXmlOption::None;
StringObject *replace_option = nullptr;
Value fallback_option = nullptr;
};
Expand Down
1 change: 1 addition & 0 deletions include/natalie/string_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ class StringObject : public Object {
using EncodeOptions = EncodingObject::EncodeOptions;
using EncodeInvalidOption = EncodingObject::EncodeInvalidOption;
using EncodeNewlineOption = EncodingObject::EncodeNewlineOption;
using EncodeXmlOption = EncodingObject::EncodeXmlOption;
using EncodeUndefOption = EncodingObject::EncodeUndefOption;

String m_string {};
Expand Down
44 changes: 11 additions & 33 deletions spec/core/string/shared/encode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -394,71 +394,49 @@ def replace_to_s(c)

describe "given the xml: :text option" do
it "replaces all instances of '&' with '&'" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'& and &'.send(@method, "UTF-8", xml: :text).should == '& and &'
end
'& and &'.send(@method, "UTF-8", xml: :text).should == '& and &'
end

it "replaces all instances of '<' with '&lt;'" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'< and <'.send(@method, "UTF-8", xml: :text).should == '&lt; and &lt;'
end
'< and <'.send(@method, "UTF-8", xml: :text).should == '&lt; and &lt;'
end

it "replaces all instances of '>' with '&gt;'" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'> and >'.send(@method, "UTF-8", xml: :text).should == '&gt; and &gt;'
end
'> and >'.send(@method, "UTF-8", xml: :text).should == '&gt; and &gt;'
end

it "does not replace '\"'" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'" and "'.send(@method, "UTF-8", xml: :text).should == '" and "'
end
'" and "'.send(@method, "UTF-8", xml: :text).should == '" and "'
end

it "replaces undefined characters with their upper-case hexadecimal numeric character references" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'ürst'.send(@method, Encoding::US_ASCII, xml: :text).should == '&#xFC;rst'
end
'ürst'.send(@method, Encoding::US_ASCII, xml: :text).should == '&#xFC;rst'
end
end

describe "given the xml: :attr option" do
it "surrounds the encoded text with double-quotes" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'abc'.send(@method, "UTF-8", xml: :attr).should == '"abc"'
end
'abc'.send(@method, "UTF-8", xml: :attr).should == '"abc"'
end

it "replaces all instances of '&' with '&amp;'" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'& and &'.send(@method, "UTF-8", xml: :attr).should == '"&amp; and &amp;"'
end
'& and &'.send(@method, "UTF-8", xml: :attr).should == '"&amp; and &amp;"'
end

it "replaces all instances of '<' with '&lt;'" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'< and <'.send(@method, "UTF-8", xml: :attr).should == '"&lt; and &lt;"'
end
'< and <'.send(@method, "UTF-8", xml: :attr).should == '"&lt; and &lt;"'
end

it "replaces all instances of '>' with '&gt;'" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'> and >'.send(@method, "UTF-8", xml: :attr).should == '"&gt; and &gt;"'
end
'> and >'.send(@method, "UTF-8", xml: :attr).should == '"&gt; and &gt;"'
end

it "replaces all instances of '\"' with '&quot;'" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'" and "'.send(@method, "UTF-8", xml: :attr).should == '"&quot; and &quot;"'
end
'" and "'.send(@method, "UTF-8", xml: :attr).should == '"&quot; and &quot;"'
end

it "replaces undefined characters with their upper-case hexadecimal numeric character references" do
NATFIXME 'xml option', exception: ArgumentError, message: 'unknown keyword: :xml' do
'ürst'.send(@method, Encoding::US_ASCII, xml: :attr).should == '"&#xFC;rst"'
end
'ürst'.send(@method, Encoding::US_ASCII, xml: :attr).should == '"&#xFC;rst"'
end
end

Expand Down
56 changes: 55 additions & 1 deletion src/encoding_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje
if (orig_encoding->num() == Encoding::ASCII_8BIT && num() == Encoding::ASCII_8BIT)
return str;

StringObject temp_string = StringObject("", (EncodingObject *)this);
ClassObject *EncodingClass = find_top_level_const(env, "Encoding"_s)->as_class();
StringObject temp_string = StringObject("", (EncodingObject *)this);

if (options.xml_option == EncodeXmlOption::Attr)
temp_string.append_char('"');

size_t index = 0;
auto string = str->string();
Expand Down Expand Up @@ -48,6 +51,43 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje
break;
}

switch (options.xml_option) {
case EncodeXmlOption::None:
break;
case EncodeXmlOption::Attr:
switch (c) {
case '&':
temp_string.append("&amp;");
continue;
case '<':
temp_string.append("&lt;");
continue;
case '>':
temp_string.append("&gt;");
continue;
case '"':
temp_string.append("&quot;");
continue;
default:
break;
}
break;
case EncodeXmlOption::Text:
switch (c) {
case '&':
temp_string.append("&amp;");
continue;
case '<':
temp_string.append("&lt;");
continue;
case '>':
temp_string.append("&gt;");
continue;
default:
break;
}
}

auto handle_fallback = [&](nat_int_t cpt) {
auto ch = new StringObject { orig_encoding->encode_codepoint(cpt) };
Value result = NilObject::the();
Expand Down Expand Up @@ -124,10 +164,21 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje
if (destination_codepoint < 0) {
switch (options.undef_option) {
case EncodeUndefOption::Raise:
switch (options.xml_option) {
case EncodeXmlOption::None:
break;
case EncodeXmlOption::Attr:
case EncodeXmlOption::Text:
auto entity = String::format("&#x{};", String::hex(unicode_codepoint, String::HexFormat::Uppercase));
temp_string.append(entity);
continue;
}

if (options.fallback_option) {
handle_fallback(unicode_codepoint);
continue;
}

StringObject *message;
if (orig_encoding->num() != Encoding::UTF_8)
message = StringObject::format(
Expand Down Expand Up @@ -160,6 +211,9 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje
temp_string.append(destination_char_obj);
}

if (options.xml_option == EncodeXmlOption::Attr)
temp_string.append_char('"');

str->set_str(temp_string.string().c_str(), temp_string.string().length());
str->set_encoding(EncodingObject::get(num()));
return str;
Expand Down
10 changes: 10 additions & 0 deletions src/string_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1172,6 +1172,16 @@ Value StringObject::encode_in_place(Env *env, Value dst_encoding, Value src_enco
auto fallback = kwargs->remove(env, "fallback"_s);
if (fallback && !fallback->is_nil())
options.fallback_option = fallback;

auto xml = kwargs->remove(env, "xml"_s);
if (xml) {
if (xml == "attr"_s)
options.xml_option = EncodeXmlOption::Attr;
else if (xml == "text"_s)
options.xml_option = EncodeXmlOption::Text;
else
env->raise("ArgumentError", "unexpected value for xml option: {}", xml->inspect_str(env));
}
}

auto find_encoding = [&](Value encoding) {
Expand Down
Loading