Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
traversc committed Sep 9, 2024
1 parent 96de7c9 commit 1ae2878
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 47 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: qs2
Type: Package
Title: A Package for Efficient Serialization
Version: 0.1.0
Date: 2024-09-07
Date: 2024-09-09
Authors@R: c(
person("Travers", "Ching", email = "[email protected]", role = c("aut", "cre", "cph")),
person("Yann", "Collet", role = c("ctb", "cph"), comment = "Yann Collet is the author of the bundled zstd"),
Expand Down
3 changes: 1 addition & 2 deletions R/documentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ shared_params_save <- function(warn_unsupported_types=FALSE) {
'The maximum and minimum possible values depends on the version of ZSTD library used.',
'As of ZSTD 1.5.6 the maximum compression level is 22, and the minimum is -131072. Usually, values in the low positive range offer very good performance in terms',
'of speed and compression.',
'@param shuffle Whether to allow byte shuffling when compressing data (default: `TRUE`). See section *Byte shuffling* in the vignette or readme for details.',
# '@param store_checksum Whether to store the checksum in the saved object (Default `TRUE`) which is used to verify file integrity during read. There is a performance penalty when enabled and a checksum is not at all necessary so you may want to disable it.',
'@param shuffle Whether to allow byte shuffling when compressing data (default: `TRUE`).',
'@param warn_unsupported_types Whether to warn when saving an object with an unsupported type (default `TRUE`).'[warn_unsupported_types],
'@param nthreads The number of threads to use when compressing data (default: `1`).')
}
Expand Down
2 changes: 1 addition & 1 deletion man/qd_save.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/qs_save.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 0 additions & 20 deletions src/qd_deserializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,6 @@ struct QdataDeserializer {
}
}
public:
// template <typename T> void shuffle_get_data(T * buf, const uint64_t len, const uint64_t bytesoftype = sizeof(T)) {
// if( shuffle && (len > MIN_SHUFFLE_ARRAYSIZE) ) {
// if(len > shuffleblock.size()) shuffleblock.resize(len);
// reader.get_data( reinterpret_cast<char*>(shuffleblock.data()), len );
// blosc_unshuffle(reinterpret_cast<uint8_t *>(shuffleblock.data()), reinterpret_cast<uint8_t *>(buf), len, bytesoftype);
// } else {
// reader.get_data( reinterpret_cast<char*>(buf), len );
// }
// }

// len, attr_length should be pre-initialized to 0
void read_header(qstype & type, uint64_t & object_length, uint32_t & attr_length) {
Expand Down Expand Up @@ -216,17 +207,6 @@ struct QdataDeserializer {
}
}

// inline void read_string_UF_header(uint32_t & string_len) {
// uint8_t header_byte = reader.template get_pod<uint8_t>();
// if(header_byte < string_UF_max_8) {
// string_len = header_byte;
// } else if(header_byte == string_UF_header_NA) {
// string_len = NA_STRING_LENGTH;
// } else {
// string_len = reader.template get_pod_contiguous<uint32_t>();
// }
// }

void read_and_assign_attributes(SEXP object, const uint32_t attr_length) {
SEXP aptr = Rf_allocList(attr_length);
SET_ATTRIB(object, aptr); // assign immediately for protection
Expand Down
40 changes: 18 additions & 22 deletions tests/correctness_testing.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,7 @@ rand_strings <- function(N) {

# do not include functions as they do not evaluate to TRUE with identical(x, y)
random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
if (sample(3, 1) == 1) {
ret <- as.list(1:N)
} else if (sample(2, 1) == 1) {
ret <- as.pairlist(1:N)
} else {
ret <- as.pairlist(1:N)
}
ret <- as.list(1:N)
for (i in 1:N) {
if (get_obj_size() > get("max_size", envir = globalenv())) break;
otype <- sample(12, size = 1)
Expand All @@ -92,7 +86,7 @@ random_object_generator <- function(N, with_envs = FALSE) { # additional input:
else if (otype == 4) { z <- (sample(256, size = 1e4, replace = TRUE) - 1) %>% as.raw; set_obj_size(z); }
else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
else if (with_envs && otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
else { z <- random_object_generator(N, with_envs) }
if (is_attribute) {
Expand Down Expand Up @@ -370,22 +364,24 @@ for (q in 1:reps) {
}
cat("\n")

if(format == "qs2") {
time <- vector("numeric", length = 8)
for (i in 1:8) {
obj_size <- 0
x1 <- random_object_generator(12)
printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
time[i] <- Sys.time()
qs_save_rand(x1)
z <- qs_read_rand()
time[i] <- Sys.time() - time[i]
do_gc()
stopifnot(identical(z, x1))
time <- vector("numeric", length = 8)
for (i in 1:8) {
obj_size <- 0
if(format == "qs2") {
x1 <- random_object_generator(12, with_envs = TRUE)
} else { # qdata
x1 <- random_object_generator(12, with_envs = FALSE)
}
printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
cat("\n")
printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
time[i] <- Sys.time()
qs_save_rand(x1)
z <- qs_read_rand()
time[i] <- Sys.time() - time[i]
do_gc()
stopifnot(identical(z, x1))
}
printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
cat("\n")

time <- vector("numeric", length = internal_reps)
for (i in 1:internal_reps) {
Expand Down

0 comments on commit 1ae2878

Please sign in to comment.