Skip to content

Commit

Permalink
Make --transform accept commands that modify files in-place
Browse files Browse the repository at this point in the history
Fixes #15
  • Loading branch information
pkolaczk committed Jul 10, 2020
1 parent 4c8eec1 commit 489b51a
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 33 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ Exclude a part of the directory tree from the scan:

fclones / -R --exclude '/dev/**' '/proc/**'

### Preprocessing files
Use `--transform` option to safely transform files by an external command.
By default, the transformation happens on a copy of a file, to avoid accidental data loss.

Strip exif before matching duplicate jpg images:

fclones . -R --names '*.jpg' --caseless --transform 'exiv2 -d a $IN' --in-place

### Other

List more options:
Expand Down
50 changes: 39 additions & 11 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,40 @@ pub struct Config {
#[structopt(short = "H", long)]
pub hard_links: bool,

/// Before matching, transform each file by the specified program.
/// Before matching, transforms each file by the specified program.
/// The value of this parameter should contain a command: the path to the program
/// and optionally its space-separated arguments.
/// and optionally a list of space-separated arguments.
/// By default, the file to process will be piped to the standard input of the program and the
/// processed data will be read from the standard output.
/// If the program does not support piping, but requires its input and/or output file path
/// If the program does not support piping, but requires its input and/or output file path(s)
/// to be specified in the argument list, denote these paths by $IN and $OUT special variables.
/// If $IN is specified in the command string, the file will not be piped to the standard input.
/// If $OUT is specified in the command string, the result will not be read from
/// the standard output, but fclones will set up a named pipe $OUT and read from
/// that pipe instead.
/// If $IN is specified in the command string, the file will not be piped to the standard input,
/// but copied first to a temporary location and that temporary location will be substituted
/// as the value of $IN when launching the transform command.
/// Similarly, if $OUT is specified in the command string, the result will not be read from
/// the standard output, but fclones will expect the program to write to a named pipe
/// specified by $OUT and will read output from there.
/// If the program modifies the original file in-place without writing to the standard output
/// nor a distinct file, use --in-place flag.
#[structopt(long, value_name("command"))]
pub transform: Option<String>,

/// Set this flag if the command given to --transform transforms the file in-place,
/// i.e. it modifies the original input file instead of writing to the standard output
/// or to a new file. This flag tells fclones to read output from the original file
/// after the transform command exited.
#[structopt(long)]
pub in_place: bool,

/// Doesn't copy the file to a temporary location before transforming,
/// when $IN parameter is specified in the --transform command.
/// If this flag is present, $IN will point to the original file.
/// Caution:
/// this option may speed up processing, but it may cause loss of data because it lets
/// the transform command to work directly on the original file.
#[structopt(long)]
pub no_copy: bool,

/// Searches for over-replicated files with replication factor above the specified value.
/// Specifying neither `--rf-over` nor `--rf-under` is equivalent to `--rf-over 1` which would
/// report duplicate files.
Expand Down Expand Up @@ -231,9 +251,17 @@ impl Config {
}

fn build_transform(&self, command: &str, log: &Log) -> io::Result<Transform> {
Transform::new(command.to_string()).map_err(|e| {
log.err(e);
exit(1);
})
match Transform::new(command.to_string(), self.in_place) {
Ok(mut tr) => {
if self.no_copy {
tr.copy = false
};
Ok(tr)
}
Err(e) => {
log.err(e);
exit(1);
}
}
}
}
136 changes: 114 additions & 22 deletions src/transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,42 @@ pub struct Output {
/// Some programs do not accept reading input from the stdin, but prefer to be pointed
/// to a file by a command-line option - in this case `Named` variant is used.
enum InputConf {
/// Pipe the input file from the given path to the stdin of the child
StdIn(PathBuf),
/// Pass the original path to the file as $IN param
Named(PathBuf),
/// Copy the original file to a temporary location and pass it as $IN param
Copied(PathBuf, PathBuf),
}

impl InputConf {
fn input_path(&self) -> &PathBuf {
match self {
InputConf::StdIn(path) => path,
InputConf::Named(path) => path,
InputConf::Copied(_src, target) => target,
}
}

fn prepare_input_file(&self) -> io::Result<()> {
match self {
InputConf::StdIn(_path) => Ok(()),
InputConf::Named(_path) => Ok(()),
InputConf::Copied(src, target) => {
std::fs::copy(src, target)?;
Ok(())
}
}
}

/// Removes the temporary file if it was created
fn cleanup(&self) -> io::Result<()> {
match self {
InputConf::StdIn(_) => Ok(()),
InputConf::Named(_) => Ok(()),
InputConf::Copied(_, target) => std::fs::remove_file(target),
}
}
}

/// Controls how we read data out from the child process.
Expand All @@ -47,37 +81,67 @@ enum OutputConf {
StdOut,
/// Send data through a named pipe
Named(PathBuf),
/// Read data from the same file as the input
InPlace(PathBuf),
}

/// Transforms files through an external program.
/// The `command_str` field contains a path to a program and its space separated arguments.
/// The command takes a file given in the `$IN` variable and produces an `$OUT` file.
pub struct Transform {
/// a path to a program and its space separated arguments
pub command_str: String,
pub program: String,
/// temporary directory for storing files and named pipes
pub tmp_dir: PathBuf,
/// copy the file into temporary directory before running the transform on it
pub copy: bool,
/// read output from the same location as the original
pub in_place: bool,
/// will be set to the name of the program, extracted from the command_str
program: String,
}

impl Transform {
pub fn new(command_str: String) -> io::Result<Transform> {
pub fn new(command_str: String, in_place: bool) -> io::Result<Transform> {
let has_in = RefCell::new(false);
let has_out = RefCell::new(false);
let parsed = Self::parse_command(&command_str, |s: &str| match s {
"OUT" if cfg!(windows) => {
*has_out.borrow_mut() = true;
OsString::from("$OUT")
}
_ => OsString::from(s),

let parsed = Self::parse_command(&command_str, |s: &str| {
match s {
"OUT" if cfg!(windows) => *has_out.borrow_mut() = true,
"IN" => *has_in.borrow_mut() = true,
_ => {}
};
OsString::from(s)
});

if cfg!(windows) && has_out.into_inner() {
let has_in = has_in.into_inner();
let has_out = has_out.into_inner();

if cfg!(windows) && has_out {
return Err(io::Error::new(
io::ErrorKind::Other,
"$OUT not supported on Windows yet",
));
}
if in_place && has_out {
return Err(io::Error::new(
io::ErrorKind::Other,
"$OUT conflicts with --in-place",
));
}
if in_place && !has_in {
return Err(io::Error::new(
io::ErrorKind::Other,
"$IN required with --in-place",
));
}

let program = match parsed.first() {
Some(p) => p.clone().into_string().unwrap(),
let program = parsed
.first()
.and_then(|p| PathBuf::from(p).file_name().map(|s| s.to_os_string()));
let program = match program {
Some(p) => p.into_string().unwrap(),
None => {
return Err(io::Error::new(
io::ErrorKind::Other,
Expand All @@ -103,6 +167,8 @@ impl Transform {
command_str,
program,
tmp_dir: Transform::create_temp_dir()?,
copy: has_in,
in_place,
})
}

Expand All @@ -122,6 +188,12 @@ impl Transform {
}
}

/// Creates a new unique random file name in the temporary directory
fn random_tmp_file_name(&self) -> PathBuf {
self.tmp_dir
.join(format!("{:032x}", Uuid::new_v4().as_u128()))
}

/// Returns the output file path for the given input file path
pub fn output(&self, input: &Path) -> PathBuf {
self.tmp_dir.join(format!("{:x}", input.hash128()))
Expand Down Expand Up @@ -168,7 +240,9 @@ impl Transform {
pub fn run(&self, input: &Path) -> io::Result<Output> {
let (args, input_conf, output_conf) = self.make_args(&input);
let mut command = Self::build_command(&args, &input_conf, &output_conf)?;
Self::execute(&mut command, &output_conf)
let result = Self::execute(&mut command, &output_conf)?;
input_conf.cleanup()?;
Ok(result)
}

/// Creates arguments, input and output configuration for processing given input path.
Expand All @@ -178,6 +252,11 @@ impl Transform {
let output_conf = RefCell::<OutputConf>::new(OutputConf::StdOut);

let args = Self::parse_command(self.command_str.as_str(), |arg| match arg {
"IN" if self.copy => {
let tmp_target = self.random_tmp_file_name();
input_conf.replace(InputConf::Copied(input.to_path_buf(), tmp_target.clone()));
tmp_target.into_os_string()
}
"IN" => {
let input = input.to_path_buf();
input_conf.replace(InputConf::Named(input.clone()));
Expand All @@ -191,7 +270,14 @@ impl Transform {
_ => OsString::from(arg),
});

(args, input_conf.into_inner(), output_conf.into_inner())
let input_conf = input_conf.into_inner();
let mut output_conf = output_conf.into_inner();

if self.in_place {
output_conf = OutputConf::InPlace(input_conf.input_path().clone())
}

(args, input_conf, output_conf)
}

/// Builds the `Command` struct from the parsed arguments
Expand All @@ -205,8 +291,9 @@ impl Transform {
command.args(args);
command.stderr(Stdio::piped());

if let InputConf::StdIn(input_path) = input_conf {
command.stdin(File::open(&input_path)?);
input_conf.prepare_input_file()?;
if let InputConf::StdIn(_) = input_conf {
command.stdin(File::open(input_conf.input_path())?);
} else {
command.stdin(Stdio::null());
}
Expand Down Expand Up @@ -262,10 +349,15 @@ impl Transform {
str
});

let result = if let OutputConf::Named(output) = output_conf {
stream_hash(&mut File::open(output)?, FileLen::MAX, |_| {})
} else {
stream_hash(&mut child_out.unwrap(), FileLen::MAX, |_| {})
let result = match output_conf {
OutputConf::StdOut => stream_hash(&mut child_out.unwrap(), FileLen::MAX, |_| {}),
OutputConf::Named(output) => {
stream_hash(&mut File::open(output)?, FileLen::MAX, |_| {})
}
OutputConf::InPlace(output) => {
child.wait()?;
stream_hash(&mut File::open(output)?, FileLen::MAX, |_| {})
}
}?;

Ok(Output {
Expand Down Expand Up @@ -347,14 +439,14 @@ mod test {

#[test]
fn empty() {
assert!(Transform::new(String::from(" ")).is_err());
assert!(Transform::new(String::from(" "), false).is_err());
}

#[test]
#[cfg(unix)]
fn piped() {
with_dir("target/test/transform/piped/", |root| {
let p = Transform::new(String::from("dd")).unwrap();
let p = Transform::new(String::from("dd"), false).unwrap();
let input_path = root.join("input.txt");
let mut input = File::create(&input_path).unwrap();
let content = b"content";
Expand Down Expand Up @@ -382,7 +474,7 @@ mod test {
#[cfg(unix)]
fn parameterized() {
with_dir("target/test/transform/param/", |root| {
let p = Transform::new(String::from("dd if=$IN of=$OUT")).unwrap();
let p = Transform::new(String::from("dd if=$IN of=$OUT"), false).unwrap();
let input_path = root.join("input.txt");
let mut input = File::create(&input_path).unwrap();
let content = b"content";
Expand Down

0 comments on commit 489b51a

Please sign in to comment.