From 83570590262ec6a86c4fbb0cc7522207004361e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 17 Jul 2024 17:51:50 +0200 Subject: [PATCH] Add "pyc-zero-mtime" handler to set embedded mtime in pyc to 0 Pyc files store the mtime, size, and optionally hash of the original py file. Mtime is used to validate the cached bytecode: the mtime of the .py file (if present) must be equal to the stored value in the .pyc file. If not, the pyc file is considered stale. On ostree systems, all mtimes are set to 0. The problem is that the .py file is created first, then the byte compilation step creates .pyc with some embedded timestamp, and later ostree discards mtimes on all files. On the end system, the bytecode cache is considered invalid. This new handler does the two very simple things: 1. zero out the mtime in the .pyc file 2. set mtime (in the filesystem) to zero for the .py file This second step is also done by ostree, so strictly speaking, it's not necessary. But it's easy to do it, and this way the bytecode still matches the file on disk, so if we called Python after this operation, it would be able to use the bytecode and wouldn't try to rewrite it. Replaces https://github.com/keszybz/add-determinism/pull/26. See https://github.com/ostreedev/ostree/issues/1469, https://gitlab.com/fedora/bootc/tracker/-/issues/3, https://pagure.io/workstation-ostree-config/pull-request/505. --- src/handlers/pyc.rs | 96 +++++++++++++++++++++- tests/test_handlers/mod.rs | 1 + tests/test_handlers/test_pyc_zero_mtime.rs | 41 +++++++++ 3 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 tests/test_handlers/test_pyc_zero_mtime.rs diff --git a/src/handlers/pyc.rs b/src/handlers/pyc.rs index 59666e5..a6d6a87 100644 --- a/src/handlers/pyc.rs +++ b/src/handlers/pyc.rs @@ -1,17 +1,20 @@ /* SPDX-License-Identifier: GPL-3.0-or-later */ -use anyhow::Result; +use anyhow::{bail, Result}; use log::debug; +use std::fs::File; +use std::io; use std::io::{Read, Write}; use std::iter; use std::path::{Path, PathBuf}; use std::rc::Rc; use std::str; +use std::time; use itertools::Itertools; use num_bigint_dig::{BigInt, ToBigInt}; -use crate::handlers::InputOutputHelper; +use crate::handlers::{InputOutputHelper, unwrap_os_string}; use crate::options; const PYC_MAGIC: &[u8] = &[0x0D, 0x0A]; @@ -739,6 +742,20 @@ impl PycParser { Ok(removed_count > 0) } + + fn set_zero_mtime(&mut self) -> Result { + // Set the embedded mtime timestamp of the source .py file to 0 in the header. + + if self.py_content_mtime() == 0 { + return Ok(false); + } + + let offset = if self.version < (3, 7) { 4 } else { 8 }; + self.data[offset..offset+4].fill(0); + assert!(self.py_content_mtime() == 0); + + Ok(true) + } } @@ -781,6 +798,81 @@ impl super::Processor for Pyc { } } + +pub struct PycZeroMtime { + config: Rc, +} + +impl PycZeroMtime { + pub fn boxed(config: &Rc) -> Box { + Box::new(Self { config: config.clone() }) + } + + fn set_zero_mtime_on_py_file(&self, input_path: &Path) -> Result<()> { + let input_file_name = unwrap_os_string(input_path.file_name().unwrap())?; + let base = input_file_name.split('.').nth(0).unwrap(); + let py_path = input_path.with_file_name(format!("{base}.py")); + debug!("Looking at {}…", py_path.display()); + + let py_file = match File::open(&py_path) { + Ok(some) => some, + Err(e) => { + if e.kind() == io::ErrorKind::NotFound { + debug!("{}: not found, ignoring", py_path.display()); + return Ok(()); + } else { + bail!("{}: cannot open: {}", py_path.display(), e); + } + } + }; + + let orig = py_file.metadata()?; + if !orig.file_type().is_file() { + debug!("{}: not a file, ignoring", py_path.display()); + } else if orig.modified()? == time::UNIX_EPOCH { + debug!("{}: mtime is already 0", py_path.display()); + } else if self.config.check { + debug!("{}: not touching mtime in --check mode", py_path.display()); + } else { + py_file.set_modified(time::UNIX_EPOCH)?; + debug!("{}: mtime set to 0", py_path.display()); + } + + Ok(()) + } +} + +impl super::Processor for PycZeroMtime { + fn name(&self) -> &str { + "pyc-zero-mtime" + } + + fn filter(&self, path: &Path) -> Result { + Ok(path.extension().is_some_and(|x| x == "pyc")) + } + + fn process(&self, input_path: &Path) -> Result { + let (mut io, input) = InputOutputHelper::open(input_path, self.config.check)?; + + let mut parser = PycParser::from_file(input_path, input)?; + let have_mod = parser.set_zero_mtime()?; + + if have_mod { + io.open_output()?; + io.output.as_mut().unwrap().write_all(&parser.data)?; + } + + let res = io.finalize(have_mod)?; + + if have_mod { + self.set_zero_mtime_on_py_file(input_path)?; + } + + Ok(res) + } +} + + #[cfg(test)] mod tests { use super::*; diff --git a/tests/test_handlers/mod.rs b/tests/test_handlers/mod.rs index c02332b..d0fdc1e 100644 --- a/tests/test_handlers/mod.rs +++ b/tests/test_handlers/mod.rs @@ -1,6 +1,7 @@ mod test_ar; mod test_javadoc; mod test_pyc; +mod test_pyc_zero_mtime; use anyhow::Result; use std::fs; diff --git a/tests/test_handlers/test_pyc_zero_mtime.rs b/tests/test_handlers/test_pyc_zero_mtime.rs new file mode 100644 index 0000000..d630d61 --- /dev/null +++ b/tests/test_handlers/test_pyc_zero_mtime.rs @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +use std::fs::File; +use std::os::linux::fs::MetadataExt; +use std::time; + +use add_determinism::handlers; +use add_determinism::handlers::pyc; + +use super::{prepare_dir, make_handler}; + +#[test] +fn test_adapters() { + let (dir, input) = prepare_dir("tests/cases/adapters.cpython-312.pyc").unwrap(); + + // We take the lazy step of creating an empty .py file here. + // This is enough to be able to adjust the mtime. + let py_file = File::options() + .read(true) + .write(true) + .create_new(true) + .open(dir.path().join("adapters.py")) + .unwrap(); + + let pyc = make_handler(111, false, pyc::PycZeroMtime::boxed).unwrap(); + + assert!(pyc.filter(&*input).unwrap()); + + let orig = input.metadata().unwrap(); + + assert_eq!(pyc.process(&*input).unwrap(), handlers::ProcessResult::Replaced); + + let new = input.metadata().unwrap(); + // because of timestamp granularity, creation ts might be equal + assert!(orig.created().unwrap() <= new.created().unwrap()); + assert_eq!(orig.modified().unwrap(), new.modified().unwrap()); + assert_ne!(orig.st_ino(), new.st_ino()); + + let new2 = py_file.metadata().unwrap(); + assert_eq!(new2.modified().unwrap(), time::UNIX_EPOCH); +}