diff --git a/DESCRIPTION b/DESCRIPTION index 1918377..d2e4b01 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: qs2 Type: Package Title: A Package for Efficient Serialization Version: 0.1.0 -Date: 2024-09-07 +Date: 2024-09-09 Authors@R: c( person("Travers", "Ching", email = "traversc@gmail.com", role = c("aut", "cre", "cph")), person("Yann", "Collet", role = c("ctb", "cph"), comment = "Yann Collet is the author of the bundled zstd"), diff --git a/R/documentation.R b/R/documentation.R index e134413..f72015c 100644 --- a/R/documentation.R +++ b/R/documentation.R @@ -7,8 +7,7 @@ shared_params_save <- function(warn_unsupported_types=FALSE) { 'The maximum and minimum possible values depends on the version of ZSTD library used.', 'As of ZSTD 1.5.6 the maximum compression level is 22, and the minimum is -131072. Usually, values in the low positive range offer very good performance in terms', 'of speed and compression.', - '@param shuffle Whether to allow byte shuffling when compressing data (default: `TRUE`). See section *Byte shuffling* in the vignette or readme for details.', - # '@param store_checksum Whether to store the checksum in the saved object (Default `TRUE`) which is used to verify file integrity during read. There is a performance penalty when enabled and a checksum is not at all necessary so you may want to disable it.', + '@param shuffle Whether to allow byte shuffling when compressing data (default: `TRUE`).', '@param warn_unsupported_types Whether to warn when saving an object with an unsupported type (default `TRUE`).'[warn_unsupported_types], '@param nthreads The number of threads to use when compressing data (default: `1`).') } diff --git a/man/qd_save.Rd b/man/qd_save.Rd index 45f1b65..439d488 100644 --- a/man/qd_save.Rd +++ b/man/qd_save.Rd @@ -19,7 +19,7 @@ The maximum and minimum possible values depends on the version of ZSTD library u As of ZSTD 1.5.6 the maximum compression level is 22, and the minimum is -131072. Usually, values in the low positive range offer very good performance in terms of speed and compression.} -\item{shuffle}{Whether to allow byte shuffling when compressing data (default: \code{TRUE}). See section \emph{Byte shuffling} in the vignette or readme for details.} +\item{shuffle}{Whether to allow byte shuffling when compressing data (default: \code{TRUE}).} \item{warn_unsupported_types}{Whether to warn when saving an object with an unsupported type (default \code{TRUE}).} diff --git a/man/qs_save.Rd b/man/qs_save.Rd index d368f5c..c9b9e37 100644 --- a/man/qs_save.Rd +++ b/man/qs_save.Rd @@ -18,7 +18,7 @@ The maximum and minimum possible values depends on the version of ZSTD library u As of ZSTD 1.5.6 the maximum compression level is 22, and the minimum is -131072. Usually, values in the low positive range offer very good performance in terms of speed and compression.} -\item{shuffle}{Whether to allow byte shuffling when compressing data (default: \code{TRUE}). See section \emph{Byte shuffling} in the vignette or readme for details.} +\item{shuffle}{Whether to allow byte shuffling when compressing data (default: \code{TRUE}).} \item{nthreads}{The number of threads to use when compressing data (default: \code{1}).} } diff --git a/src/qd_deserializer.h b/src/qd_deserializer.h index a6d6f06..38734bd 100644 --- a/src/qd_deserializer.h +++ b/src/qd_deserializer.h @@ -174,15 +174,6 @@ struct QdataDeserializer { } } public: - // template void shuffle_get_data(T * buf, const uint64_t len, const uint64_t bytesoftype = sizeof(T)) { - // if( shuffle && (len > MIN_SHUFFLE_ARRAYSIZE) ) { - // if(len > shuffleblock.size()) shuffleblock.resize(len); - // reader.get_data( reinterpret_cast(shuffleblock.data()), len ); - // blosc_unshuffle(reinterpret_cast(shuffleblock.data()), reinterpret_cast(buf), len, bytesoftype); - // } else { - // reader.get_data( reinterpret_cast(buf), len ); - // } - // } // len, attr_length should be pre-initialized to 0 void read_header(qstype & type, uint64_t & object_length, uint32_t & attr_length) { @@ -216,17 +207,6 @@ struct QdataDeserializer { } } - // inline void read_string_UF_header(uint32_t & string_len) { - // uint8_t header_byte = reader.template get_pod(); - // if(header_byte < string_UF_max_8) { - // string_len = header_byte; - // } else if(header_byte == string_UF_header_NA) { - // string_len = NA_STRING_LENGTH; - // } else { - // string_len = reader.template get_pod_contiguous(); - // } - // } - void read_and_assign_attributes(SEXP object, const uint32_t attr_length) { SEXP aptr = Rf_allocList(attr_length); SET_ATTRIB(object, aptr); // assign immediately for protection diff --git a/tests/correctness_testing.R b/tests/correctness_testing.R index 69c6be1..9b81250 100644 --- a/tests/correctness_testing.R +++ b/tests/correctness_testing.R @@ -74,13 +74,7 @@ rand_strings <- function(N) { # do not include functions as they do not evaluate to TRUE with identical(x, y) random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size - if (sample(3, 1) == 1) { - ret <- as.list(1:N) - } else if (sample(2, 1) == 1) { - ret <- as.pairlist(1:N) - } else { - ret <- as.pairlist(1:N) - } + ret <- as.list(1:N) for (i in 1:N) { if (get_obj_size() > get("max_size", envir = globalenv())) break; otype <- sample(12, size = 1) @@ -92,7 +86,7 @@ random_object_generator <- function(N, with_envs = FALSE) { # additional input: else if (otype == 4) { z <- (sample(256, size = 1e4, replace = TRUE) - 1) %>% as.raw; set_obj_size(z); } else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } - else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } else { z <- random_object_generator(N, with_envs) } if (is_attribute) { @@ -370,22 +364,24 @@ for (q in 1:reps) { } cat("\n") - if(format == "qs2") { - time <- vector("numeric", length = 8) - for (i in 1:8) { - obj_size <- 0 - x1 <- random_object_generator(12) - printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) - time[i] <- Sys.time() - qs_save_rand(x1) - z <- qs_read_rand() - time[i] <- Sys.time() - time[i] - do_gc() - stopifnot(identical(z, x1)) + time <- vector("numeric", length = 8) + for (i in 1:8) { + obj_size <- 0 + if(format == "qs2") { + x1 <- random_object_generator(12, with_envs = TRUE) + } else { # qdata + x1 <- random_object_generator(12, with_envs = FALSE) } - printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) - cat("\n") + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qs_save_rand(x1) + z <- qs_read_rand() + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(identical(z, x1)) } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") time <- vector("numeric", length = internal_reps) for (i in 1:internal_reps) {