Skip to content

Commit

Permalink
Move BOM stuff to it's own namespace'
Browse files Browse the repository at this point in the history
  • Loading branch information
RauliL committed Oct 16, 2024
1 parent 3acbc8b commit 23238fd
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 43 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.6)

PROJECT(
PeeloUnicode
VERSION 1.1.0
VERSION 2.0.0
DESCRIPTION "Header only C++ Unicode utilities."
HOMEPAGE_URL "https://github.com/peelonet/peelo-unicode"
LANGUAGES CXX
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ main()
length = f.gcount();
f.close();

if (const auto bom = peelo::unicode::detect_bom(buffer, length))
if (const auto bom = peelo::unicode::bom::detect(buffer, length))
{
if (*bom == peelo::unicode::bom::utf16_be)
if (*bom == peelo::unicode::bom::type::utf16_be)
{
std::cout << "File has UTF-16BE BOM." << std::endl;
} else {
Expand Down
38 changes: 19 additions & 19 deletions include/peelo/unicode/bom.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@
#include <optional>
#include <string>

namespace peelo::unicode
namespace peelo::unicode::bom
{
/**
* Enumeration of different recognized BOM types.
*/
enum class bom
enum class type
{
utf8,
utf16_be,
Expand All @@ -59,72 +59,72 @@ namespace peelo::unicode
* @return Byte order mark detected in the given byte string, or null option
* if the given byte string does not contain byte order mark.
*/
inline std::optional<bom>
detect_bom(const char* input, std::size_t length)
inline std::optional<type>
detect(const char* input, std::size_t length)
{
struct bom_info
{
const char* bytes;
std::size_t length;
bom type;
enum type type;
};
static constexpr std::size_t bom_array_size = 11;
static const std::array<bom_info, bom_array_size> bom_array =
{{
{
"\xef\xbb\xbf",
3,
bom::utf8,
type::utf8,
},
{
"\0\0\xfe\xff",
4,
bom::utf32_be,
type::utf32_be,
},
{
"\xff\xfe\0\0",
4,
bom::utf32_le,
type::utf32_le,
},
{
"\xfe\xff",
2,
bom::utf16_be,
type::utf16_be,
},
{
"\xff\xfe",
2,
bom::utf16_le,
type::utf16_le,
},
{
"\x2b\x2f\x76",
3,
bom::utf7,
type::utf7,
},
{
"\xf7\x64\x4c",
3,
bom::utf1,
type::utf1,
},
{
"\xdd\x73\x66\x73",
4,
bom::utf_ebcdic
type::utf_ebcdic
},
{
"\x0e\xfe\xff",
3,
bom::scsu
type::scsu
},
{
"\xfb\xee\x28",
3,
bom::bocu_1
type::bocu_1
},
{
"\x84\x31\x95\x33",
4,
bom::gb18030
type::gb18030
},
}};

Expand Down Expand Up @@ -152,9 +152,9 @@ namespace peelo::unicode
* @return Byte order mark detected in the given byte string, or null option
* if the given byte string does not contain byte order mark.
*/
inline std::optional<bom>
detect_bom(const std::string& input)
inline std::optional<type>
detect(const std::string& input)
{
return detect_bom(input.c_str(), input.length());
return detect(input.c_str(), input.length());
}
}
41 changes: 20 additions & 21 deletions test/test_bom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,18 @@
# pragma warning( disable : 4100 )
#endif

using peelo::unicode::detect_bom;
using peelo::unicode::bom;
using namespace peelo::unicode::bom;

static void
test_recognized_bom(
bom expected_type,
type expected_type,
const char* input,
std::size_t length
)
{
// This looks weird but in GitHub CI I get warnings about unused variables if
// I do this in some other way.
if (const auto result = detect_bom(input, length))
if (const auto result = detect(input, length))
{
assert(*result == expected_type);
} else {
Expand All @@ -29,83 +28,83 @@ test_recognized_bom(
static void
test_utf8()
{
test_recognized_bom(bom::utf8, "\xef\xbb\xbf", 3);
test_recognized_bom(type::utf8, "\xef\xbb\xbf", 3);
}

static void
test_utf16_be()
{
test_recognized_bom(bom::utf16_be, "\xfe\xff", 2);
test_recognized_bom(type::utf16_be, "\xfe\xff", 2);
}

static void
test_utf16_le()
{
test_recognized_bom(bom::utf16_le, "\xff\xfe", 2);
test_recognized_bom(type::utf16_le, "\xff\xfe", 2);
}

static void
test_utf32_be()
{
test_recognized_bom(bom::utf32_be, "\0\0\xfe\xff", 4);
test_recognized_bom(type::utf32_be, "\0\0\xfe\xff", 4);
}

static void
test_utf32_le()
{
test_recognized_bom(bom::utf32_le, "\xff\xfe\0\0", 4);
test_recognized_bom(type::utf32_le, "\xff\xfe\0\0", 4);
}

static void
test_utf7()
{
test_recognized_bom(bom::utf7, "\x2b\x2f\x76", 3);
test_recognized_bom(type::utf7, "\x2b\x2f\x76", 3);
}

static void
test_utf1()
{
test_recognized_bom(bom::utf1, "\xf7\x64\x4c", 3);
test_recognized_bom(type::utf1, "\xf7\x64\x4c", 3);
}

static void
test_utf_ebcdic()
{
test_recognized_bom(bom::utf_ebcdic, "\xdd\x73\x66\x73", 4);
test_recognized_bom(type::utf_ebcdic, "\xdd\x73\x66\x73", 4);
}

static void
test_scsu()
{
test_recognized_bom(bom::scsu, "\x0e\xfe\xff", 3);
test_recognized_bom(type::scsu, "\x0e\xfe\xff", 3);
}

static void
test_bocu_1()
{
test_recognized_bom(bom::bocu_1, "\xfb\xee\x28", 3);
test_recognized_bom(type::bocu_1, "\xfb\xee\x28", 3);
}

static void
test_gb18030()
{
test_recognized_bom(bom::gb18030, "\x84\x31\x95\x33", 4);
test_recognized_bom(type::gb18030, "\x84\x31\x95\x33", 4);
}

static void
test_unrecognized_bom()
{
assert(!detect_bom("", 0));
assert(!detect_bom("a", 1));
assert(!detect_bom("a\xef\xbb\xbf", 4));
assert(!detect_bom("\x00\xbb\xbf\xef\xbb\xbf", 6));
assert(!detect("", 0));
assert(!detect("a", 1));
assert(!detect("a\xef\xbb\xbf", 4));
assert(!detect("\x00\xbb\xbf\xef\xbb\xbf", 6));
}

static void
test_with_string()
{
assert(!!detect_bom(std::string("\xef\xbb\xbf")));
assert(!detect_bom(std::string("a")));
assert(!!detect(std::string("\xef\xbb\xbf")));
assert(!detect(std::string("a")));
}

int
Expand Down

0 comments on commit 23238fd

Please sign in to comment.