From dbcb277aa54301b3b196f82cbfb76218140a4218 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Wed, 22 May 2024 16:26:10 +0300 Subject: [PATCH 1/7] Add tuklib_mbstr_nonprint to mask non-printable characters In multibyte locales, some control characters are multibyte too, for example, terminals interpret C1 control characters (U+0080 to U+009F) that are two bytes as UTF-8. Thus, multibyte character sets have to be handled. Instead of checking for control characters with iswcntrl(), this uses iswprint() to detect printable characters. This is much stricter. Gnulib's quotearg would do a lot more but I hope such a thing isn't needed here. Thanks to Ryan Colyer for the discussion about the problems of the earlier single-byte-only method. Thanks to Christian Weisgerber for reporting a bug in an earlier version of this code. Thanks to Jeroen Roovers for a typo fix. --- src/Makefile.am | 2 + src/common/tuklib_mbstr_nonprint.c | 139 +++++++++++++++++++++++++++++ src/common/tuklib_mbstr_nonprint.h | 76 ++++++++++++++++ 3 files changed, 217 insertions(+) create mode 100644 src/common/tuklib_mbstr_nonprint.c create mode 100644 src/common/tuklib_mbstr_nonprint.h diff --git a/src/Makefile.am b/src/Makefile.am index 10613234c..c759c14ef 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -29,6 +29,8 @@ EXTRA_DIST = \ common/tuklib_integer.h \ common/tuklib_mbstr_fw.c \ common/tuklib_mbstr.h \ + common/tuklib_mbstr_nonprint.c \ + common/tuklib_mbstr_nonprint.h \ common/tuklib_mbstr_width.c \ common/tuklib_open_stdxxx.c \ common/tuklib_open_stdxxx.h \ diff --git a/src/common/tuklib_mbstr_nonprint.c b/src/common/tuklib_mbstr_nonprint.c new file mode 100644 index 000000000..64d2e8d65 --- /dev/null +++ b/src/common/tuklib_mbstr_nonprint.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr_nonprint.c +/// \brief Find and replace non-printable characters with question marks +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_mbstr_nonprint.h" +#include +#include + +#ifdef HAVE_MBRTOWC +# include +# include +#else +# include +#endif + + +static bool +is_next_printable(const char *str, size_t len, size_t *next_len) +{ +#ifdef HAVE_MBRTOWC + // This cheats slightly and assumes that character sets with + // locking shifts aren't used. I couldn't find info about even + // ancient use of such charsets on POSIX-like systems. + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + + wchar_t wc; + *next_len = mbrtowc(&wc, str, len, &ps); + + if (*next_len == (size_t)-2) { + // Incomplete multibyte sequence: Treat the whole sequence + // as a single non-printable multibyte character that ends + // the string. + *next_len = len; + return false; + } + + // Check more broadly than just ret == (size_t)-1 to be safe + // in case mbrtowc() returns something weird. This check + // covers (size_t)-1 (that is, SIZE_MAX) too because len is from + // strlen() and the terminating '\0' isn't part of the length. + if (*next_len < 1 || *next_len > len) { + // Invalid multibyte sequence: Treat the first byte as + // a non-printable single-byte character. Decoding will + // be restarted from the next byte on the next call to + // this function. + *next_len = 1; + return false; + } + + return iswprint((wint_t)wc) != 0; +#else + (void)len; + *next_len = 1; + return isprint((unsigned char)str[0]) != 0; +#endif +} + + +static bool +has_nonprint(const char *str, size_t len) +{ + for (size_t i = 0; i < len; ) { + size_t next_len; + if (!is_next_printable(str + i, len - i, &next_len)) + return true; + + i += next_len; + } + + return false; +} + + +extern bool +tuklib_has_nonprint(const char *str) +{ + return has_nonprint(str, strlen(str)); +} + + +extern const char * +tuklib_mask_nonprint_r(const char *str, char **mem) +{ + // Free the old string, if any. + free(*mem); + *mem = NULL; + + // If the whole input string contains only printable characters, + // return the input string. + const size_t len = strlen(str); + if (!has_nonprint(str, len)) + return str; + + // Allocate memory for the masked string. Since we use the single-byte + // character '?' to mask non-printable characters, it's possible that + // a few bytes less memory would be needed in reality if multibyte + // characters are masked. + // + // If allocation fails, return "???" because it should be safer than + // returning the unmasked string. + *mem = malloc(len + 1); + if (*mem == NULL) + return "???"; + + // Replace all non-printable characters with '?'. + char *dest = *mem; + + for (size_t i = 0; i < len; ) { + size_t next_len; + if (is_next_printable(str + i, len - i, &next_len)) { + memcpy(dest, str + i, next_len); + dest += next_len; + } else { + *dest++ = '?'; + } + + i += next_len; + } + + *dest = '\0'; + + return *mem; +} + + +extern const char * +tuklib_mask_nonprint(const char *str) +{ + static char *mem = NULL; + return tuklib_mask_nonprint_r(str, &mem); +} diff --git a/src/common/tuklib_mbstr_nonprint.h b/src/common/tuklib_mbstr_nonprint.h new file mode 100644 index 000000000..d50545735 --- /dev/null +++ b/src/common/tuklib_mbstr_nonprint.h @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_mbstr_nonprint.h +/// \brief Find and replace non-printable characters with question marks +/// +/// If mbrtowc(3) is available, it and iswprint(3) is used to check if all +/// characters are printable. Otherwise single-byte character set is assumed +/// and isprint(3) is used. +/// +/// It is assumed that such locales don't exist which use a character set +/// with locked shift states. This should be a safe assumption even on +/// ancient legacy systems at least if they are remotely POSIX-like. +/// That is, multibyte character sets like UTF-8, EUC-JP, Shift-JIS, Big5, +/// and GB18030 are compatible with this implementation. +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_MBSTR_NONPRINT_H +#define TUKLIB_MBSTR_NONPRINT_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_has_nonprint TUKLIB_SYMBOL(tuklib_has_nonprint) +extern bool tuklib_has_nonprint(const char *str); +///< +/// \brief Check if a string contains any non-printable characters +/// +/// \return false if str contains only valid multibyte characters and +/// iswprint(3) returns non-zero for all of them; true otherwise +/// +/// \note In case mbrtowc(3) isn't available, single-byte character set +/// is assumed and isprint(3) is used instead of iswprint(3). + + +#define tuklib_mask_nonprint_r TUKLIB_SYMBOL(tuklib_mask_nonprint_r) +extern const char *tuklib_mask_nonprint_r(const char *str, char **mem); +///< +/// \brief Replace non-printable characters with question marks +/// +/// \param str Untrusted string, for example, a filename +/// \param mem This function will always call free(*mem) to free +/// the old allocation. Before the first call, *mem should +/// be initialized to NULL. If this function needs to +/// allocate memory for a modified string, a pointer +/// to the allocated memory will be stored to *mem. +/// Otherwise *mem will remain NULL. +/// +/// \return If tuklib_has_nonprint(str) returns false, this function +/// returns str. Otherwise memory is allocated to hold a modified +/// string and a pointer to that is returned. The pointer to the +/// allocated memory is also stored to *mem. A modified string +/// has the problematic characters replaced by '?'. If memory +/// allocation fails, "???" is returned and *mem is NULL. + +#define tuklib_mask_nonprint TUKLIB_SYMBOL(tuklib_mask_nonprint) +extern const char *tuklib_mask_nonprint(const char *str); +///< +/// \brief Replace non-printable characters with question marks +/// +/// This is a convenience function for single-threaded use. This calls +/// tuklib_mask_nonprint_r() using an internal static variable to hold +/// the possible allocation. +/// +/// \param str Untrusted string, for example, a filename +/// +/// \return See tuklib_mask_nonprint_r(). +/// +/// \note This function is not thread safe! + +TUKLIB_DECLS_END +#endif From bc51383abed30e7e2c4a6f068c72c71fa02bcfc6 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 7 May 2024 13:06:21 +0300 Subject: [PATCH 2/7] xz: Use tuklib_mbstr_nonprint Call tuklib_mask_nonprint() on filenames and also on a few other strings from the command line too. The lack of this feature has been listed in TODO since 2009: 5f6dddc6c911df02ba660564e78e6de80947c947 --- CMakeLists.txt | 2 ++ src/xz/Makefile.am | 1 + src/xz/coder.c | 19 +++++++---- src/xz/file_io.c | 81 +++++++++++++++++++++++++++++----------------- src/xz/list.c | 23 ++++++++----- src/xz/main.c | 10 ++++-- src/xz/message.c | 8 +++-- src/xz/options.c | 10 +++--- src/xz/private.h | 1 + src/xz/suffix.c | 10 +++--- 10 files changed, 106 insertions(+), 59 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 534d6acea..238d4fc95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1649,6 +1649,8 @@ if(NOT MSVC OR MSVC_VERSION GREATER_EQUAL 1900) src/common/sysdefs.h src/common/tuklib_common.h src/common/tuklib_config.h + src/common/tuklib_mbstr_nonprint.c + src/common/tuklib_mbstr_nonprint.h src/common/tuklib_exit.c src/common/tuklib_exit.h src/common/tuklib_gettext.h diff --git a/src/xz/Makefile.am b/src/xz/Makefile.am index 4ff061f30..de83ef460 100644 --- a/src/xz/Makefile.am +++ b/src/xz/Makefile.am @@ -32,6 +32,7 @@ xz_SOURCES = \ ../common/tuklib_open_stdxxx.c \ ../common/tuklib_progname.c \ ../common/tuklib_exit.c \ + ../common/tuklib_mbstr_nonprint.c \ ../common/tuklib_mbstr_width.c \ ../common/tuklib_mbstr_fw.c diff --git a/src/xz/coder.c b/src/xz/coder.c index 5e41f0df6..c9899abbe 100644 --- a/src/xz/coder.c +++ b/src/xz/coder.c @@ -1003,8 +1003,9 @@ coder_init(file_pair *pair) strm.avail_out = 0; while ((ret = lzma_code(&strm, LZMA_RUN)) == LZMA_UNSUPPORTED_CHECK) - message_warning(_("%s: %s"), pair->src_name, - message_strm(ret)); + message_warning(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); // With --single-stream lzma_code won't wait for // LZMA_FINISH and thus it can return LZMA_STREAM_END @@ -1019,7 +1020,9 @@ coder_init(file_pair *pair) } if (ret != LZMA_OK) { - message_error(_("%s: %s"), pair->src_name, message_strm(ret)); + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); if (ret == LZMA_MEMLIMIT_ERROR) message_mem_needed(V_ERROR, lzma_memusage(&strm)); @@ -1320,11 +1323,13 @@ coder_normal(file_pair *pair) // wrong and we print an error. Otherwise it's just // a warning and coding can continue. if (stop) { - message_error(_("%s: %s"), pair->src_name, - message_strm(ret)); + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); } else { - message_warning(_("%s: %s"), pair->src_name, - message_strm(ret)); + message_warning(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); // When compressing, all possible errors set // stop to true. diff --git a/src/xz/file_io.c b/src/xz/file_io.c index 678a9a5ca..978f19b18 100644 --- a/src/xz/file_io.c +++ b/src/xz/file_io.c @@ -205,8 +205,9 @@ io_wait(file_pair *pair, int timeout, bool is_reading) continue; message_error(_("%s: poll() failed: %s"), - is_reading ? pair->src_name - : pair->dest_name, + tuklib_mask_nonprint(is_reading + ? pair->src_name + : pair->dest_name), strerror(errno)); return IO_WAIT_ERROR; } @@ -272,14 +273,15 @@ io_unlink(const char *name, const struct stat *known_st) // of the original file, and in that case it obviously // shouldn't be removed. message_warning(_("%s: File seems to have been moved, " - "not removing"), name); + "not removing"), tuklib_mask_nonprint(name)); else #endif // There's a race condition between lstat() and unlink() // but at least we have tried to avoid removing wrong file. if (unlink(name)) message_warning(_("%s: Cannot remove: %s"), - name, strerror(errno)); + tuklib_mask_nonprint(name), + strerror(errno)); return; } @@ -305,7 +307,8 @@ io_copy_attrs(const file_pair *pair) if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) && warn_fchown) message_warning(_("%s: Cannot set the file owner: %s"), - pair->dest_name, strerror(errno)); + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); mode_t mode; @@ -318,7 +321,8 @@ io_copy_attrs(const file_pair *pair) && fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) { message_warning(_("%s: Cannot set the file group: %s"), - pair->dest_name, strerror(errno)); + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); // We can still safely copy some additional permissions: // 'group' must be at least as strict as 'other' and // also vice versa. @@ -337,7 +341,8 @@ io_copy_attrs(const file_pair *pair) if (fchmod(pair->dest_fd, mode)) message_warning(_("%s: Cannot set the file permissions: %s"), - pair->dest_name, strerror(errno)); + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); #endif // Copy the timestamps. We have several possible ways to do this, of @@ -515,13 +520,15 @@ io_open_src_real(file_pair *pair) if (!follow_symlinks) { struct stat st; if (lstat(pair->src_name, &st)) { - message_error(_("%s: %s"), pair->src_name, + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), strerror(errno)); return true; } else if (S_ISLNK(st.st_mode)) { message_warning(_("%s: Is a symbolic link, " - "skipping"), pair->src_name); + "skipping"), + tuklib_mask_nonprint(pair->src_name)); return true; } } @@ -583,13 +590,15 @@ io_open_src_real(file_pair *pair) if (was_symlink) message_warning(_("%s: Is a symbolic link, " - "skipping"), pair->src_name); + "skipping"), + tuklib_mask_nonprint(pair->src_name)); else #endif // Something else than O_NOFOLLOW failing // (assuming that the race conditions didn't // confuse us). - message_error(_("%s: %s"), pair->src_name, + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), strerror(errno)); return true; @@ -612,13 +621,13 @@ io_open_src_real(file_pair *pair) if (S_ISDIR(pair->src_st.st_mode)) { message_warning(_("%s: Is a directory, skipping"), - pair->src_name); + tuklib_mask_nonprint(pair->src_name)); goto error; } if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { message_warning(_("%s: Not a regular file, skipping"), - pair->src_name); + tuklib_mask_nonprint(pair->src_name)); goto error; } @@ -636,21 +645,21 @@ io_open_src_real(file_pair *pair) // explicitly in io_copy_attr(). message_warning(_("%s: File has setuid or " "setgid bit set, skipping"), - pair->src_name); + tuklib_mask_nonprint(pair->src_name)); goto error; } if (pair->src_st.st_mode & S_ISVTX) { message_warning(_("%s: File has sticky bit " "set, skipping"), - pair->src_name); + tuklib_mask_nonprint(pair->src_name)); goto error; } if (pair->src_st.st_nlink > 1) { message_warning(_("%s: Input file has more " - "than one hard link, " - "skipping"), pair->src_name); + "than one hard link, skipping"), + tuklib_mask_nonprint(pair->src_name)); goto error; } } @@ -679,7 +688,8 @@ io_open_src_real(file_pair *pair) return false; error_msg: - message_error(_("%s: %s"), pair->src_name, strerror(errno)); + message_error(_("%s: %s"), tuklib_mask_nonprint(pair->src_name), + strerror(errno)); error: (void)close(pair->src_fd); return true; @@ -816,7 +826,8 @@ io_open_dest_real(file_pair *pair) if (st.st_dev == -1) { message_error("%s: Refusing to write to " "a DOS special file", - pair->dest_name); + tuklib_mask_nonprint( + pair->dest_name)); free(pair->dest_name); return true; } @@ -826,7 +837,8 @@ io_open_dest_real(file_pair *pair) && st.st_ino == pair->src_st.st_ino) { message_error("%s: Output file is the same " "as the input file", - pair->dest_name); + tuklib_mask_nonprint( + pair->dest_name)); free(pair->dest_name); return true; } @@ -836,7 +848,8 @@ io_open_dest_real(file_pair *pair) // If --force was used, unlink the target file first. if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { message_error(_("%s: Cannot remove: %s"), - pair->dest_name, strerror(errno)); + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); free(pair->dest_name); return true; } @@ -851,7 +864,8 @@ io_open_dest_real(file_pair *pair) pair->dest_fd = open(pair->dest_name, flags, mode); if (pair->dest_fd == -1) { - message_error(_("%s: %s"), pair->dest_name, + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->dest_name), strerror(errno)); free(pair->dest_name); return true; @@ -882,7 +896,7 @@ io_open_dest_real(file_pair *pair) else if (pair->dest_fd != STDOUT_FILENO && !S_ISREG(pair->dest_st.st_mode)) { message_error("%s: Destination is not a regular file", - pair->dest_name); + tuklib_mask_nonprint(pair->dest_name)); // dest_fd needs to be reset to -1 to keep io_close() working. (void)close(pair->dest_fd); @@ -1005,7 +1019,8 @@ io_close_dest(file_pair *pair, bool success) if (close(pair->dest_fd)) { message_error(_("%s: Closing the file failed: %s"), - pair->dest_name, strerror(errno)); + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); // Closing destination file failed, so we cannot trust its // contents. Get rid of junk: @@ -1042,7 +1057,8 @@ io_close(file_pair *pair, bool success) SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when trying " "to create a sparse file: %s"), - pair->dest_name, strerror(errno)); + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); success = false; } else { const uint8_t zero[1] = { '\0' }; @@ -1141,7 +1157,8 @@ io_read(file_pair *pair, io_buf *buf, size_t size) #endif message_error(_("%s: Read error: %s"), - pair->src_name, strerror(errno)); + tuklib_mask_nonprint(pair->src_name), + strerror(errno)); return SIZE_MAX; } @@ -1171,7 +1188,8 @@ io_seek_src(file_pair *pair, uint64_t pos) if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) { message_error(_("%s: Error seeking the file: %s"), - pair->src_name, strerror(errno)); + tuklib_mask_nonprint(pair->src_name), + strerror(errno)); return true; } @@ -1195,7 +1213,7 @@ io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos) if (amount != size) { message_error(_("%s: Unexpected end of file"), - pair->src_name); + tuklib_mask_nonprint(pair->src_name)); return true; } @@ -1254,7 +1272,8 @@ io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) // user_abort, and get EPIPE here. if (errno != EPIPE) message_error(_("%s: Write error: %s"), - pair->dest_name, strerror(errno)); + tuklib_mask_nonprint(pair->dest_name), + strerror(errno)); return true; } @@ -1304,7 +1323,9 @@ io_write(file_pair *pair, const io_buf *buf, size_t size) SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when " "trying to create a sparse " - "file: %s"), pair->dest_name, + "file: %s"), + tuklib_mask_nonprint( + pair->dest_name), strerror(errno)); return true; } diff --git a/src/xz/list.c b/src/xz/list.c index ca9cf03e8..9b445ac1e 100644 --- a/src/xz/list.c +++ b/src/xz/list.c @@ -347,13 +347,14 @@ static bool parse_indexes(xz_file_info *xfi, file_pair *pair) { if (pair->src_st.st_size <= 0) { - message_error(_("%s: File is empty"), pair->src_name); + message_error(_("%s: File is empty"), + tuklib_mask_nonprint(pair->src_name)); return true; } if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { message_error(_("%s: Too small to be a valid .xz file"), - pair->src_name); + tuklib_mask_nonprint(pair->src_name)); return true; } @@ -365,7 +366,9 @@ parse_indexes(xz_file_info *xfi, file_pair *pair) hardware_memlimit_get(MODE_LIST), (uint64_t)(pair->src_st.st_size)); if (ret != LZMA_OK) { - message_error(_("%s: %s"), pair->src_name, message_strm(ret)); + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), + message_strm(ret)); return true; } @@ -411,7 +414,8 @@ parse_indexes(xz_file_info *xfi, file_pair *pair) } default: - message_error(_("%s: %s"), pair->src_name, + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), message_strm(ret)); // If the error was too low memory usage limit, @@ -473,7 +477,8 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter, break; case LZMA_OPTIONS_ERROR: - message_error(_("%s: %s"), pair->src_name, + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), message_strm(LZMA_OPTIONS_ERROR)); return true; @@ -587,7 +592,8 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter, // Check if the stringification succeeded. if (str_ret != LZMA_OK) { - message_error(_("%s: %s"), pair->src_name, + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), message_strm(str_ret)); return true; } @@ -596,7 +602,8 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter, data_error: // Show the error message. - message_error(_("%s: %s"), pair->src_name, + message_error(_("%s: %s"), + tuklib_mask_nonprint(pair->src_name), message_strm(LZMA_DATA_ERROR)); return true; } @@ -1048,7 +1055,7 @@ print_info_robot(xz_file_info *xfi, file_pair *pair) char checks[CHECKS_STR_SIZE]; get_check_names(checks, lzma_index_checks(xfi->idx), false); - printf("name\t%s\n", pair->src_name); + printf("name\t%s\n", tuklib_mask_nonprint(pair->src_name)); printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s\t%" PRIu64 "\n", diff --git a/src/xz/main.c b/src/xz/main.c index 71b5ef7b7..1b8b37881 100644 --- a/src/xz/main.c +++ b/src/xz/main.c @@ -87,7 +87,8 @@ read_name(const args_info *args) continue; message_error(_("%s: Error reading filenames: %s"), - args->files_name, strerror(errno)); + tuklib_mask_nonprint(args->files_name), + strerror(errno)); return NULL; } @@ -95,7 +96,8 @@ read_name(const args_info *args) if (pos != 0) message_error(_("%s: Unexpected end of input " "when reading filenames"), - args->files_name); + tuklib_mask_nonprint( + args->files_name)); return NULL; } @@ -120,7 +122,9 @@ read_name(const args_info *args) message_error(_("%s: Null character found when " "reading filenames; maybe you meant " "to use '--files0' instead " - "of '--files'?"), args->files_name); + "of '--files'?"), + tuklib_mask_nonprint( + args->files_name)); return NULL; } diff --git a/src/xz/message.c b/src/xz/message.c index deafdb438..415bd4adc 100644 --- a/src/xz/message.c +++ b/src/xz/message.c @@ -196,10 +196,12 @@ print_filename(void) // If we don't know how many files there will be due // to usage of --files or --files0. if (files_total == 0) - fprintf(file, "%s (%u)\n", filename, + fprintf(file, "%s (%u)\n", + tuklib_mask_nonprint(filename), files_pos); else - fprintf(file, "%s (%u/%u)\n", filename, + fprintf(file, "%s (%u/%u)\n", + tuklib_mask_nonprint(filename), files_pos, files_total); signals_unblock(); @@ -648,7 +650,7 @@ progress_flush(bool finished) cols[4]); } else { // The filename is always printed. - fprintf(stderr, _("%s: "), filename); + fprintf(stderr, _("%s: "), tuklib_mask_nonprint(filename)); // Percentage is printed only if we didn't finish yet. if (!finished) { diff --git a/src/xz/options.c b/src/xz/options.c index bc8bc1a6c..af0b28c59 100644 --- a/src/xz/options.c +++ b/src/xz/options.c @@ -83,14 +83,15 @@ parse_options(const char *str, const option_map *opts, if (value == NULL || value[0] == '\0') message_fatal(_("%s: Options must be 'name=value' " - "pairs separated with commas"), str); + "pairs separated with commas"), + tuklib_mask_nonprint(str)); // Look for the option name from the option map. unsigned i = 0; while (true) { if (opts[i].name == NULL) message_fatal(_("%s: Invalid option name"), - name); + tuklib_mask_nonprint(name)); if (strcmp(name, opts[i].name) == 0) break; @@ -110,7 +111,7 @@ parse_options(const char *str, const option_map *opts, if (opts[i].map[j].name == NULL) message_fatal(_("%s: Invalid option value"), - value); + tuklib_mask_nonprint(value)); set(filter_options, i, opts[i].map[j].id, value); @@ -244,7 +245,8 @@ tuklib_attr_noreturn static void error_lzma_preset(const char *valuestr) { - message_fatal(_("Unsupported LZMA1/LZMA2 preset: %s"), valuestr); + message_fatal(_("Unsupported LZMA1/LZMA2 preset: %s"), + tuklib_mask_nonprint(valuestr)); } diff --git a/src/xz/private.h b/src/xz/private.h index b370472e3..d351a995e 100644 --- a/src/xz/private.h +++ b/src/xz/private.h @@ -28,6 +28,7 @@ #include "tuklib_gettext.h" #include "tuklib_progname.h" #include "tuklib_exit.h" +#include "tuklib_mbstr_nonprint.h" #include "tuklib_mbstr.h" #if defined(_WIN32) && !defined(__CYGWIN__) diff --git a/src/xz/suffix.c b/src/xz/suffix.c index 1d548e485..a80f45656 100644 --- a/src/xz/suffix.c +++ b/src/xz/suffix.c @@ -163,7 +163,7 @@ uncompressed_name(const char *src_name, const size_t src_len) if (new_len == 0) { message_warning(_("%s: Filename has an unknown suffix, " - "skipping"), src_name); + "skipping"), tuklib_mask_nonprint(src_name)); return NULL; } @@ -184,7 +184,7 @@ static void msg_suffix(const char *src_name, const char *suffix) { message_warning(_("%s: File already has '%s' suffix, skipping"), - src_name, suffix); + tuklib_mask_nonprint(src_name), suffix); return; } @@ -389,8 +389,10 @@ suffix_set(const char *suffix) { // Empty suffix and suffixes having a directory separator are // rejected. Such suffixes would break things later. - if (suffix[0] == '\0' || has_dir_sep(suffix)) - message_fatal(_("%s: Invalid filename suffix"), suffix); + if (suffix[0] == '\0' || tuklib_has_nonprint(suffix) + || has_dir_sep(suffix)) + message_fatal(_("%s: Invalid filename suffix"), + tuklib_mask_nonprint(suffix)); // Replace the old custom_suffix (if any) with the new suffix. free(custom_suffix); From 479b568d2bb6e25115fbb3b503855a79d0b27eb0 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 7 May 2024 13:06:21 +0300 Subject: [PATCH 3/7] xzdec: Use setlocale() This prepares for tuklib_mask_nonprint() from tuklib_mbstr_nonprint.c. It has locale-specific behavior (LC_CTYPE). --- src/xzdec/xzdec.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/xzdec/xzdec.c b/src/xzdec/xzdec.c index 4d881748b..0b3e4dec1 100644 --- a/src/xzdec/xzdec.c +++ b/src/xzdec/xzdec.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifndef _MSC_VER # include @@ -416,6 +417,10 @@ main(int argc, char **argv) (void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); #endif + // Set the locale because tuklib_mask_nonprint() has locale-specific + // behavior. + setlocale(LC_ALL, ""); + // Initialize progname which we will be used in error messages. tuklib_progname_init(argv); From a5e9c3f3b7752b1e06ce855481c8fba0c431f92a Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 7 May 2024 13:06:21 +0300 Subject: [PATCH 4/7] xzdec: Use tuklib_mbstr_nonprint --- CMakeLists.txt | 3 +++ src/xzdec/Makefile.am | 2 ++ src/xzdec/xzdec.c | 13 +++++++++---- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 238d4fc95..4a984d716 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1536,6 +1536,8 @@ if(HAVE_DECODERS AND (NOT MSVC OR MSVC_VERSION GREATER_EQUAL 1900)) src/common/sysdefs.h src/common/tuklib_common.h src/common/tuklib_config.h + src/common/tuklib_mbstr_nonprint.c + src/common/tuklib_mbstr_nonprint.h src/common/tuklib_exit.c src/common/tuklib_exit.h src/common/tuklib_gettext.h @@ -1565,6 +1567,7 @@ if(HAVE_DECODERS AND (NOT MSVC OR MSVC_VERSION GREATER_EQUAL 1900)) endif() tuklib_progname("${XZDEC}") + tuklib_mbstr("${XZDEC}") install(TARGETS "${XZDEC}" RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" diff --git a/src/xzdec/Makefile.am b/src/xzdec/Makefile.am index 31a6508cc..f300cc45e 100644 --- a/src/xzdec/Makefile.am +++ b/src/xzdec/Makefile.am @@ -11,6 +11,7 @@ xzdec_SOURCES = \ xzdec.c \ ../common/tuklib_progname.c \ + ../common/tuklib_mbstr_nonprint.c \ ../common/tuklib_exit.c if COND_W32 @@ -34,6 +35,7 @@ xzdec_LDADD += $(LTLIBINTL) lzmadec_SOURCES = \ xzdec.c \ ../common/tuklib_progname.c \ + ../common/tuklib_mbstr_nonprint.c \ ../common/tuklib_exit.c if COND_W32 diff --git a/src/xzdec/xzdec.c b/src/xzdec/xzdec.c index 0b3e4dec1..e4734c1fc 100644 --- a/src/xzdec/xzdec.c +++ b/src/xzdec/xzdec.c @@ -43,6 +43,7 @@ #include "getopt.h" #include "tuklib_progname.h" +#include "tuklib_mbstr_nonprint.h" #include "tuklib_exit.h" #ifdef TUKLIB_DOSLIKE @@ -210,7 +211,8 @@ uncompress(lzma_stream *strm, FILE *file, const char *filename) // an error occurred. ferror() doesn't // touch errno. my_errorf("%s: Error reading input file: %s", - filename, strerror(errno)); + tuklib_mask_nonprint(filename), + strerror(errno)); exit(EXIT_FAILURE); } @@ -293,7 +295,8 @@ uncompress(lzma_stream *strm, FILE *file, const char *filename) break; } - my_errorf("%s: %s", filename, msg); + my_errorf("%s: %s", tuklib_mask_nonprint(filename), + msg); exit(EXIT_FAILURE); } } @@ -458,8 +461,10 @@ main(int argc, char **argv) src_name = argv[optind]; src_file = fopen(src_name, "rb"); if (src_file == NULL) { - my_errorf("%s: %s", src_name, - strerror(errno)); + my_errorf("%s: %s", + tuklib_mask_nonprint( + src_name), + strerror(errno)); exit(EXIT_FAILURE); } } From 7b516124af338fb348e7399223c24d6e26b6d455 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 7 May 2024 13:06:21 +0300 Subject: [PATCH 5/7] lzmainfo: Use tuklib_mbstr_nonprint --- CMakeLists.txt | 3 +++ src/lzmainfo/Makefile.am | 1 + src/lzmainfo/lzmainfo.c | 16 ++++++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a984d716..5cf043529 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1595,6 +1595,8 @@ if(HAVE_DECODERS AND (NOT MSVC OR MSVC_VERSION GREATER_EQUAL 1900)) src/common/sysdefs.h src/common/tuklib_common.h src/common/tuklib_config.h + src/common/tuklib_mbstr_nonprint.c + src/common/tuklib_mbstr_nonprint.h src/common/tuklib_exit.c src/common/tuklib_exit.h src/common/tuklib_gettext.h @@ -1619,6 +1621,7 @@ if(HAVE_DECODERS AND (NOT MSVC OR MSVC_VERSION GREATER_EQUAL 1900)) endif() tuklib_progname(lzmainfo) + tuklib_mbstr(lzmainfo) # NOTE: The translations are in the "xz" domain and the .mo files are # installed as part of the "xz" target. diff --git a/src/lzmainfo/Makefile.am b/src/lzmainfo/Makefile.am index 9a5bc68b3..3b0d22391 100644 --- a/src/lzmainfo/Makefile.am +++ b/src/lzmainfo/Makefile.am @@ -6,6 +6,7 @@ bin_PROGRAMS = lzmainfo lzmainfo_SOURCES = \ lzmainfo.c \ ../common/tuklib_progname.c \ + ../common/tuklib_mbstr_nonprint.c \ ../common/tuklib_exit.c if COND_W32 diff --git a/src/lzmainfo/lzmainfo.c b/src/lzmainfo/lzmainfo.c index 2550b1f11..295aa2794 100644 --- a/src/lzmainfo/lzmainfo.c +++ b/src/lzmainfo/lzmainfo.c @@ -17,6 +17,7 @@ #include "getopt.h" #include "tuklib_gettext.h" #include "tuklib_progname.h" +#include "tuklib_mbstr_nonprint.h" #include "tuklib_exit.h" #ifdef TUKLIB_DOSLIKE @@ -104,7 +105,8 @@ lzmainfo(const char *name, FILE *f) uint8_t buf[13]; const size_t size = fread(buf, 1, sizeof(buf), f); if (size != 13) { - fprintf(stderr, "%s: %s: %s\n", progname, name, + fprintf(stderr, "%s: %s: %s\n", progname, + tuklib_mask_nonprint(name), ferror(f) ? strerror(errno) : _("File is too small to be a .lzma file")); return true; @@ -118,7 +120,8 @@ lzmainfo(const char *name, FILE *f) break; case LZMA_OPTIONS_ERROR: - fprintf(stderr, "%s: %s: %s\n", progname, name, + fprintf(stderr, "%s: %s: %s\n", progname, + tuklib_mask_nonprint(name), _("Not a .lzma file")); return true; @@ -142,7 +145,7 @@ lzmainfo(const char *name, FILE *f) // this output and we don't want to break that when people move // from LZMA Utils to XZ Utils. if (f != stdin) - printf("%s\n", name); + printf("%s\n", tuklib_mask_nonprint(name)); printf("Uncompressed size: "); if (uncompressed_size == UINT64_MAX) @@ -201,9 +204,10 @@ main(int argc, char **argv) if (f == NULL) { ret = EXIT_FAILURE; fprintf(stderr, "%s: %s: %s\n", - progname, - argv[optind], - strerror(errno)); + progname, + tuklib_mask_nonprint( + argv[optind]), + strerror(errno)); continue; } From 02b0df81de201f7ce1b6c3a3d82b6fa2d6eb544a Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 7 May 2024 13:06:21 +0300 Subject: [PATCH 6/7] Update TODO --- TODO | 3 --- 1 file changed, 3 deletions(-) diff --git a/TODO b/TODO index ad37f3f55..20a702fdc 100644 --- a/TODO +++ b/TODO @@ -19,9 +19,6 @@ Known bugs compress extremely well, so going from compression ratio of 0.003 to 0.004 means big relative increase in the compressed file size. - xz doesn't quote unprintable characters when it displays file names - given on the command line. - tuklib_exit() doesn't block signals => EINTR is possible. If liblzma has created threads and fork() gets called, liblzma From 3ad4a2505fcb0f38a76f716a1da582397ed227b1 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Tue, 7 May 2024 13:22:03 +0300 Subject: [PATCH 7/7] Update THANKS --- THANKS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/THANKS b/THANKS index 7d2d4fe82..8930ac668 100644 --- a/THANKS +++ b/THANKS @@ -33,6 +33,7 @@ has been important. :-) In alphabetical order: - Tomer Chachamu - Vitaly Chikunov - Antoine Cœur + - Ryan Colyer - Gabi Davar - İhsan Doğan - Chris Donawa @@ -132,6 +133,7 @@ has been important. :-) In alphabetical order: - Bernhard Reutner-Fischer - Markus Rickert - Cristian Rodríguez + - Jeroen Roovers - Christian von Roques - Boud Roukema - Torsten Rupp