Skip to content

Commit

Permalink
Add "pyc-zero-mtime" handler to set embedded mtime in pyc to 0
Browse files Browse the repository at this point in the history
Pyc files store the mtime, size, and optionally hash of the original
py file. Mtime is used to validate the cached bytecode: the mtime
of the .py file (if present) must be equal to the stored value in
the .pyc file. If not, the pyc file is considered stale.

On ostree systems, all mtimes are set to 0. The problem is that the
.py file is created first, then the byte compilation step creates
.pyc with some embedded timestamp, and later ostree discards mtimes
on all files. On the end system, the bytecode cache is considered
invalid.

This new handler does the two very simple things:
1. zero out the mtime in the .pyc file
2. set mtime (in the filesystem) to zero for the .py file

This second step is also done by ostree, so strictly speaking, it's
not necessary. But it's easy to do it, and this way the bytecode
still matches the file on disk, so if we called Python after this
operation, it would be able to use the bytecode and wouldn't try to
rewrite it.

Replaces #26.
See ostreedev/ostree#1469,
https://gitlab.com/fedora/bootc/tracker/-/issues/3,
https://pagure.io/workstation-ostree-config/pull-request/505.
  • Loading branch information
keszybz committed Jul 17, 2024
1 parent adf1d61 commit 8357059
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 2 deletions.
96 changes: 94 additions & 2 deletions src/handlers/pyc.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */

use anyhow::Result;
use anyhow::{bail, Result};
use log::debug;
use std::fs::File;
use std::io;
use std::io::{Read, Write};
use std::iter;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use std::str;
use std::time;

use itertools::Itertools;
use num_bigint_dig::{BigInt, ToBigInt};

use crate::handlers::InputOutputHelper;
use crate::handlers::{InputOutputHelper, unwrap_os_string};
use crate::options;

const PYC_MAGIC: &[u8] = &[0x0D, 0x0A];
Expand Down Expand Up @@ -739,6 +742,20 @@ impl PycParser {

Ok(removed_count > 0)
}

fn set_zero_mtime(&mut self) -> Result<bool> {
// Set the embedded mtime timestamp of the source .py file to 0 in the header.

if self.py_content_mtime() == 0 {
return Ok(false);
}

let offset = if self.version < (3, 7) { 4 } else { 8 };
self.data[offset..offset+4].fill(0);
assert!(self.py_content_mtime() == 0);

Ok(true)
}
}


Expand Down Expand Up @@ -781,6 +798,81 @@ impl super::Processor for Pyc {
}
}


pub struct PycZeroMtime {
config: Rc<options::Config>,
}

impl PycZeroMtime {
pub fn boxed(config: &Rc<options::Config>) -> Box<dyn super::Processor> {
Box::new(Self { config: config.clone() })
}

fn set_zero_mtime_on_py_file(&self, input_path: &Path) -> Result<()> {
let input_file_name = unwrap_os_string(input_path.file_name().unwrap())?;
let base = input_file_name.split('.').nth(0).unwrap();
let py_path = input_path.with_file_name(format!("{base}.py"));
debug!("Looking at {}…", py_path.display());

let py_file = match File::open(&py_path) {
Ok(some) => some,
Err(e) => {
if e.kind() == io::ErrorKind::NotFound {
debug!("{}: not found, ignoring", py_path.display());
return Ok(());
} else {
bail!("{}: cannot open: {}", py_path.display(), e);
}
}
};

let orig = py_file.metadata()?;
if !orig.file_type().is_file() {
debug!("{}: not a file, ignoring", py_path.display());
} else if orig.modified()? == time::UNIX_EPOCH {
debug!("{}: mtime is already 0", py_path.display());
} else if self.config.check {
debug!("{}: not touching mtime in --check mode", py_path.display());
} else {
py_file.set_modified(time::UNIX_EPOCH)?;
debug!("{}: mtime set to 0", py_path.display());
}

Ok(())
}
}

impl super::Processor for PycZeroMtime {
fn name(&self) -> &str {
"pyc-zero-mtime"
}

fn filter(&self, path: &Path) -> Result<bool> {
Ok(path.extension().is_some_and(|x| x == "pyc"))
}

fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
let (mut io, input) = InputOutputHelper::open(input_path, self.config.check)?;

let mut parser = PycParser::from_file(input_path, input)?;
let have_mod = parser.set_zero_mtime()?;

if have_mod {
io.open_output()?;
io.output.as_mut().unwrap().write_all(&parser.data)?;
}

let res = io.finalize(have_mod)?;

if have_mod {
self.set_zero_mtime_on_py_file(input_path)?;
}

Ok(res)
}
}


#[cfg(test)]
mod tests {
use super::*;
Expand Down
1 change: 1 addition & 0 deletions tests/test_handlers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod test_ar;
mod test_javadoc;
mod test_pyc;
mod test_pyc_zero_mtime;

use anyhow::Result;
use std::fs;
Expand Down
41 changes: 41 additions & 0 deletions tests/test_handlers/test_pyc_zero_mtime.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */

use std::fs::File;
use std::os::linux::fs::MetadataExt;
use std::time;

use add_determinism::handlers;
use add_determinism::handlers::pyc;

use super::{prepare_dir, make_handler};

#[test]
fn test_adapters() {
let (dir, input) = prepare_dir("tests/cases/adapters.cpython-312.pyc").unwrap();

// We take the lazy step of creating an empty .py file here.
// This is enough to be able to adjust the mtime.
let py_file = File::options()
.read(true)
.write(true)
.create_new(true)
.open(dir.path().join("adapters.py"))
.unwrap();

let pyc = make_handler(111, false, pyc::PycZeroMtime::boxed).unwrap();

assert!(pyc.filter(&*input).unwrap());

let orig = input.metadata().unwrap();

assert_eq!(pyc.process(&*input).unwrap(), handlers::ProcessResult::Replaced);

let new = input.metadata().unwrap();
// because of timestamp granularity, creation ts might be equal
assert!(orig.created().unwrap() <= new.created().unwrap());
assert_eq!(orig.modified().unwrap(), new.modified().unwrap());
assert_ne!(orig.st_ino(), new.st_ino());

let new2 = py_file.metadata().unwrap();
assert_eq!(new2.modified().unwrap(), time::UNIX_EPOCH);
}

0 comments on commit 8357059

Please sign in to comment.