Skip to content

Commit

Permalink
migrate away from std::wstring_convert
Browse files Browse the repository at this point in the history
  • Loading branch information
parmsam committed Apr 10, 2024
1 parent 5c1e502 commit ce1e5f8
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 56 deletions.
16 changes: 8 additions & 8 deletions R/cpp11.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# Generated by cpp11: do not edit by hand

compressToEncodedURIComponent_ <- function(uncompressed8) {
.Call(`_lzstringr_compressToEncodedURIComponent_`, uncompressed8)
compressToEncodedURIComponent_ <- function(bytes) {
.Call(`_lzstringr_compressToEncodedURIComponent_`, bytes)
}

decompressFromEncodedURIComponent_ <- function(compressed8) {
.Call(`_lzstringr_decompressFromEncodedURIComponent_`, compressed8)
decompressFromEncodedURIComponent_ <- function(bytes) {
.Call(`_lzstringr_decompressFromEncodedURIComponent_`, bytes)
}

compressToBase64_ <- function(uncompressed8) {
.Call(`_lzstringr_compressToBase64_`, uncompressed8)
compressToBase64_ <- function(bytes) {
.Call(`_lzstringr_compressToBase64_`, bytes)
}

decompressFromBase64_ <- function(compressed8) {
.Call(`_lzstringr_decompressFromBase64_`, compressed8)
decompressFromBase64_ <- function(bytes) {
.Call(`_lzstringr_decompressFromBase64_`, bytes)
}
28 changes: 20 additions & 8 deletions R/lzstringr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,38 @@
## usethis namespace: end
NULL

safe_compress <- function(string, f) {
string <- enc2utf8(string)
string <- iconv(string, from="UTF-8", to="UTF-16", toRaw=TRUE)[[1]]
result <- f(string)
chr_result <- rawToChar(as.raw(result))
chr_result
}

safe_decompress <- function(string, f) {
string <- enc2utf8(string)
string <- iconv(string, from="UTF-8", to="UTF-16", toRaw=TRUE)[[1]]
result <- f(string)
chr_result <- intToUtf8(result)
chr_result
}

#' @export compressToBase64
compressToBase64 <- function(string) {
string <- enc2utf8(string)
compressToBase64_(string)
safe_compress(string, compressToBase64_)
}

#' @export decompressFromBase64
decompressFromBase64 <- function(string) {
string <- enc2utf8(string)
decompressFromBase64_(string)
safe_decompress(string, decompressFromBase64_)
}

#' @export compressToEncodedURIComponent
compressToEncodedURIComponent <- function(string) {
string <- enc2utf8(string)
compressToEncodedURIComponent_(string)
safe_compress(string, compressToEncodedURIComponent_)
}

#' @export decompressFromEncodedURIComponent
decompressFromEncodedURIComponent <- function(string) {
string <- enc2utf8(string)
decompressFromEncodedURIComponent_(string)
safe_decompress(string, decompressFromEncodedURIComponent_)
}
70 changes: 42 additions & 28 deletions src/code.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,70 @@ using namespace cpp11;
#include <codecvt>
#include "lz-string.hpp"
#include <string>
#include <vector>
#include <iostream>

std::u16string createUTF16String(const std::vector<unsigned char>& bytes) {
if (bytes.size() < 2) {
throw std::runtime_error("Invalid byte array. Size must be at least 2 bytes.");
}

// Check byte order mark (BOM)
bool isLittleEndian = (bytes[0] == 0xFF && bytes[1] == 0xFE);
bool isBigEndian = (bytes[0] == 0xFE && bytes[1] == 0xFF);

if (!isLittleEndian && !isBigEndian) {
throw std::runtime_error("Invalid byte order mark (BOM).");
}

std::u16string result;
for (size_t i = 2; i < bytes.size(); i += 2) {
char16_t codeUnit;
if (isLittleEndian) {
codeUnit = static_cast<char16_t>(bytes[i] | (bytes[i + 1] << 8));
} else {
codeUnit = static_cast<char16_t>((bytes[i] << 8) | bytes[i + 1]);
}
result.push_back(codeUnit);
}

return result;
}


[[cpp11::register]]
std::string compressToEncodedURIComponent_(std::string uncompressed8) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter_8_to_16;
std::u16string uncompressed16 = converter_8_to_16.from_bytes(uncompressed8);
std::u16string compressToEncodedURIComponent_(std::vector<unsigned char> bytes) {
std::u16string uncompressed16 = createUTF16String(bytes);

auto compressed16 = lzstring::compressToEncodedURIComponent(uncompressed16);

std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter_16_to_8;
std::string compressed8 = converter_16_to_8.to_bytes(compressed16);

return compressed8;
return compressed16;
}


[[cpp11::register]]
std::string decompressFromEncodedURIComponent_(std::string compressed8) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter_8_to_16;
std::u16string compressed16 = converter_8_to_16.from_bytes(compressed8);
std::u16string decompressFromEncodedURIComponent_(std::vector<unsigned char> bytes) {
std::u16string compressed16 = createUTF16String(bytes);

auto uncompressed16 = lzstring::decompressFromEncodedURIComponent(compressed16);

std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter_16_to_8;
std::string uncompressed8 = converter_16_to_8.to_bytes(uncompressed16);

return uncompressed8;
return uncompressed16;
}

[[cpp11::register]]
std::string compressToBase64_(std::string uncompressed8) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter_8_to_16;
std::u16string uncompressed16 = converter_8_to_16.from_bytes(uncompressed8);
std::u16string compressToBase64_(std::vector<unsigned char> bytes) {
std::u16string uncompressed16 = createUTF16String(bytes);

auto compressed16 = lzstring::compressToBase64(uncompressed16);

std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter_16_to_8;
std::string compressed8 = converter_16_to_8.to_bytes(compressed16);

return compressed8;
return compressed16;
}

[[cpp11::register]]
std::string decompressFromBase64_(std::string compressed8) {
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter_8_to_16;
std::u16string compressed16 = converter_8_to_16.from_bytes(compressed8);
std::u16string decompressFromBase64_(std::vector<unsigned char> bytes) {
std::u16string compressed16 = createUTF16String(bytes);

auto uncompressed16 = lzstring::decompressFromBase64(compressed16);

std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter_16_to_8;
std::string uncompressed8 = converter_16_to_8.to_bytes(uncompressed16);

return uncompressed8;
return uncompressed16;
}
24 changes: 12 additions & 12 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,31 @@
#include <R_ext/Visibility.h>

// code.cpp
std::string compressToEncodedURIComponent_(std::string uncompressed8);
extern "C" SEXP _lzstringr_compressToEncodedURIComponent_(SEXP uncompressed8) {
std::u16string compressToEncodedURIComponent_(std::vector<unsigned char> bytes);
extern "C" SEXP _lzstringr_compressToEncodedURIComponent_(SEXP bytes) {
BEGIN_CPP11
return cpp11::as_sexp(compressToEncodedURIComponent_(cpp11::as_cpp<cpp11::decay_t<std::string>>(uncompressed8)));
return cpp11::as_sexp(compressToEncodedURIComponent_(cpp11::as_cpp<cpp11::decay_t<std::vector<unsigned char>>>(bytes)));
END_CPP11
}
// code.cpp
std::string decompressFromEncodedURIComponent_(std::string compressed8);
extern "C" SEXP _lzstringr_decompressFromEncodedURIComponent_(SEXP compressed8) {
std::u16string decompressFromEncodedURIComponent_(std::vector<unsigned char> bytes);
extern "C" SEXP _lzstringr_decompressFromEncodedURIComponent_(SEXP bytes) {
BEGIN_CPP11
return cpp11::as_sexp(decompressFromEncodedURIComponent_(cpp11::as_cpp<cpp11::decay_t<std::string>>(compressed8)));
return cpp11::as_sexp(decompressFromEncodedURIComponent_(cpp11::as_cpp<cpp11::decay_t<std::vector<unsigned char>>>(bytes)));
END_CPP11
}
// code.cpp
std::string compressToBase64_(std::string uncompressed8);
extern "C" SEXP _lzstringr_compressToBase64_(SEXP uncompressed8) {
std::u16string compressToBase64_(std::vector<unsigned char> bytes);
extern "C" SEXP _lzstringr_compressToBase64_(SEXP bytes) {
BEGIN_CPP11
return cpp11::as_sexp(compressToBase64_(cpp11::as_cpp<cpp11::decay_t<std::string>>(uncompressed8)));
return cpp11::as_sexp(compressToBase64_(cpp11::as_cpp<cpp11::decay_t<std::vector<unsigned char>>>(bytes)));
END_CPP11
}
// code.cpp
std::string decompressFromBase64_(std::string compressed8);
extern "C" SEXP _lzstringr_decompressFromBase64_(SEXP compressed8) {
std::u16string decompressFromBase64_(std::vector<unsigned char> bytes);
extern "C" SEXP _lzstringr_decompressFromBase64_(SEXP bytes) {
BEGIN_CPP11
return cpp11::as_sexp(decompressFromBase64_(cpp11::as_cpp<cpp11::decay_t<std::string>>(compressed8)));
return cpp11::as_sexp(decompressFromBase64_(cpp11::as_cpp<cpp11::decay_t<std::vector<unsigned char>>>(bytes)));
END_CPP11
}

Expand Down

0 comments on commit ce1e5f8

Please sign in to comment.