Skip to content

Commit

Permalink
Remove leading spaces from html attributes when read
Browse files Browse the repository at this point in the history
Add pointer check
  • Loading branch information
Blake-Madden committed May 31, 2024
1 parent 4775cbd commit 239f462
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
9 changes: 9 additions & 0 deletions src/import/html_extract_text.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,11 @@ namespace lily_of_the_valley
{
std::advance(foundTag, 1);
}
// step over spaces after quote
while (foundTag && foundTag < elementEnd && *foundTag == L' ')
{
std::advance(foundTag, 1);
}
if (foundTag >= elementEnd)
{
return std::make_pair(nullptr, 0);
Expand Down Expand Up @@ -2493,6 +2498,10 @@ namespace html_utilities
const size_t length) noexcept
: m_html_text(html_text), m_html_text_end(html_text + length)
{
if (html_text == nullptr)
{
return;
}
// see if there is a base url that should be used as an alternative that the client should
// use instead
const wchar_t* headStart = string_util::stristr<wchar_t>(m_html_text, L"<head");
Expand Down
17 changes: 15 additions & 2 deletions tests/htmlimporttests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,11 @@ TEST_CASE("HTML Parser", "[html import]")
CHECK(275 == html_extract_text::read_attribute_as_long(text, L"height", false));
CHECK(std::wstring(L"") == html_extract_text::read_attribute_as_string(text, L"style", false, false));
CHECK(std::wstring(L"") == html_extract_text::read_attribute_as_string(text, L"info", false, false));
CHECK(std::wstring(L" ") == html_extract_text::read_attribute_as_string(text, L"info", false, true));
CHECK(std::wstring(L"") == html_extract_text::read_attribute_as_string(text, L"info", false, true));

text = L"body style =\"\" info ='num value' height=275>there<br />world<br >!";
CHECK(std::wstring(L"num") == html_extract_text::read_attribute_as_string(text, L"info", false, false));
CHECK(std::wstring(L"num value") == html_extract_text::read_attribute_as_string(text, L"info", false, true));
}
SECTION("Read Tag Quotable")
{
Expand Down Expand Up @@ -1391,6 +1395,15 @@ TEST_CASE("Hyperlink Parser", "[html import]")
CHECK(std::wcsncmp(parse(), L"page.htm", 8) == 0);
CHECK(parse() == nullptr);
}

SECTION("Leading space")
{
const wchar_t* text = LR"(<a href=" https://depauwtigers.com/landing/index" target="_blank">Athletics</a>)";
html_hyperlink_parse parse(text, std::wcslen(text) );

CHECK(std::wstring{ parse(), parse.get_current_hyperlink_length() } == std::wstring{ L"https://depauwtigers.com/landing/index" });
CHECK(parse() == nullptr);
}

SECTION("Hyperlink")
{
Expand Down Expand Up @@ -1619,7 +1632,7 @@ TEST_CASE("Html Url Format", "[html import]")
html_url_format formatHtml(L"http://business.mypage.com/blah/blah.html");
CHECK(formatHtml.get_directory_path() == L"business.mypage.com/blah");
}
SECTION("NoProtocal")
SECTION("No Protocal")
{
html_url_format formatHtml(L"www.mypage.com");
const wchar_t* p = formatHtml({ L"page.html", 9 }, false);
Expand Down

0 comments on commit 239f462

Please sign in to comment.