Skip to content

Commit

Permalink
cp: fix the result of inodes are not the same when preserve links is …
Browse files Browse the repository at this point in the history
…flagged (#5064)

Should fix:
```
rm -rf a b c
touch a
ln -s a b
mkdir c
./target/debug/coreutils cp --preserve=links -R -H a b c
a_inode=$(ls -i c/a|sed 's,c/.*,,')
b_inode=$(ls -i c/b|sed 's,c/.*,,')
echo "$a_inode" = "$b_inode"
```
  • Loading branch information
tommady authored Sep 24, 2023
1 parent e2e42ac commit bd0fb81
Show file tree
Hide file tree
Showing 3 changed files with 296 additions and 118 deletions.
57 changes: 28 additions & 29 deletions src/uu/cp/src/copydir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! See the [`copy_directory`] function for more information.
#[cfg(windows)]
use std::borrow::Cow;
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::env;
use std::fs;
use std::io;
Expand All @@ -24,8 +24,8 @@ use uucore::uio_error;
use walkdir::{DirEntry, WalkDir};

use crate::{
aligned_ancestors, context_for, copy_attributes, copy_file, copy_link, preserve_hardlinks,
CopyResult, Error, Options,
aligned_ancestors, context_for, copy_attributes, copy_file, copy_link, CopyResult, Error,
Options,
};

/// Ensure a Windows path starts with a `\\?`.
Expand Down Expand Up @@ -200,7 +200,7 @@ fn copy_direntry(
options: &Options,
symlinked_files: &mut HashSet<FileInformation>,
preserve_hard_links: bool,
hard_links: &mut Vec<(String, u64)>,
copied_files: &mut HashMap<FileInformation, PathBuf>,
) -> CopyResult<()> {
let Entry {
source_absolute,
Expand Down Expand Up @@ -240,30 +240,27 @@ fn copy_direntry(
// If the source is not a directory, then we need to copy the file.
if !source_absolute.is_dir() {
if preserve_hard_links {
let dest = local_to_target.as_path().to_path_buf();
let found_hard_link = preserve_hardlinks(hard_links, &source_absolute, &dest)?;
if !found_hard_link {
match copy_file(
progress_bar,
&source_absolute,
local_to_target.as_path(),
options,
symlinked_files,
false,
) {
Ok(_) => Ok(()),
Err(err) => {
if source_absolute.is_symlink() {
// silent the error with a symlink
// In case we do --archive, we might copy the symlink
// before the file itself
Ok(())
} else {
Err(err)
}
match copy_file(
progress_bar,
&source_absolute,
local_to_target.as_path(),
options,
symlinked_files,
copied_files,
false,
) {
Ok(_) => Ok(()),
Err(err) => {
if source_absolute.is_symlink() {
// silent the error with a symlink
// In case we do --archive, we might copy the symlink
// before the file itself
Ok(())
} else {
Err(err)
}
}?;
}
}
}?;
} else {
// At this point, `path` is just a plain old file.
// Terminate this function immediately if there is any
Expand All @@ -277,6 +274,7 @@ fn copy_direntry(
local_to_target.as_path(),
options,
symlinked_files,
copied_files,
false,
) {
Ok(_) => {}
Expand Down Expand Up @@ -310,6 +308,7 @@ pub(crate) fn copy_directory(
target: &Path,
options: &Options,
symlinked_files: &mut HashSet<FileInformation>,
copied_files: &mut HashMap<FileInformation, PathBuf>,
source_in_command_line: bool,
) -> CopyResult<()> {
if !options.recursive {
Expand All @@ -324,6 +323,7 @@ pub(crate) fn copy_directory(
target,
options,
symlinked_files,
copied_files,
source_in_command_line,
);
}
Expand Down Expand Up @@ -372,7 +372,6 @@ pub(crate) fn copy_directory(
};
let target = tmp.as_path();

let mut hard_links: Vec<(String, u64)> = vec![];
let preserve_hard_links = options.preserve_hard_links();

// Collect some paths here that are invariant during the traversal
Expand All @@ -397,7 +396,7 @@ pub(crate) fn copy_directory(
options,
symlinked_files,
preserve_hard_links,
&mut hard_links,
copied_files,
)?;
}
// Print an error message, but continue traversing the directory.
Expand Down
158 changes: 70 additions & 88 deletions src/uu/cp/src/cp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
use quick_error::quick_error;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::env;
#[cfg(not(windows))]
use std::ffi::CString;
Expand Down Expand Up @@ -1106,67 +1106,6 @@ fn parse_path_args(
Ok((paths, target))
}

/// Get the inode information for a file.
fn get_inode(file_info: &FileInformation) -> u64 {
#[cfg(unix)]
let result = file_info.inode();
#[cfg(windows)]
let result = file_info.file_index();
result
}

#[cfg(target_os = "redox")]
fn preserve_hardlinks(
hard_links: &mut Vec<(String, u64)>,
source: &std::path::Path,
dest: &std::path::Path,
found_hard_link: &mut bool,
) -> CopyResult<()> {
// Redox does not currently support hard links
Ok(())
}

/// Hard link a pair of files if needed _and_ record if this pair is a new hard link.
#[cfg(not(target_os = "redox"))]
fn preserve_hardlinks(
hard_links: &mut Vec<(String, u64)>,
source: &std::path::Path,
dest: &std::path::Path,
) -> CopyResult<bool> {
let info = FileInformation::from_path(source, false)
.context(format!("cannot stat {}", source.quote()))?;
let inode = get_inode(&info);
let nlinks = info.number_of_links();
let mut found_hard_link = false;
#[allow(clippy::explicit_iter_loop)]
for (link, link_inode) in hard_links.iter() {
if *link_inode == inode {
// Consider the following files:
//
// * `src/f` - a regular file
// * `src/link` - a hard link to `src/f`
// * `dest/src/f` - a different regular file
//
// In this scenario, if we do `cp -a src/ dest/`, it is
// possible that the order of traversal causes `src/link`
// to get copied first (to `dest/src/link`). In that case,
// in order to make sure `dest/src/link` is a hard link to
// `dest/src/f` and `dest/src/f` has the contents of
// `src/f`, we delete the existing file to allow the hard
// linking.
if file_or_link_exists(dest) && file_or_link_exists(Path::new(link)) {
std::fs::remove_file(dest)?;
}
std::fs::hard_link(link, dest).unwrap();
found_hard_link = true;
}
}
if !found_hard_link && nlinks > 1 {
hard_links.push((dest.to_str().unwrap().to_string(), inode));
}
Ok(found_hard_link)
}

/// When handling errors, we don't always want to show them to the user. This function handles that.
fn show_error_if_needed(error: &Error) {
match error {
Expand Down Expand Up @@ -1195,14 +1134,19 @@ pub fn copy(sources: &[PathBuf], target: &Path, options: &Options) -> CopyResult
let target_type = TargetType::determine(sources, target);
verify_target_type(target, &target_type)?;

let preserve_hard_links = options.preserve_hard_links();

let mut hard_links: Vec<(String, u64)> = vec![];

let mut non_fatal_errors = false;
let mut seen_sources = HashSet::with_capacity(sources.len());
let mut symlinked_files = HashSet::new();

// to remember the copied files for further usage.
// the FileInformation implemented the Hash trait by using
// 1. inode number
// 2. device number
// the combination of a file's inode number and device number is unique throughout all the file systems.
//
// key is the source file's information and the value is the destination filepath.
let mut copied_files: HashMap<FileInformation, PathBuf> = HashMap::with_capacity(sources.len());

let progress_bar = if options.progress_bar {
let pb = ProgressBar::new(disk_usage(sources, options.recursive)?)
.with_style(
Expand All @@ -1222,28 +1166,19 @@ pub fn copy(sources: &[PathBuf], target: &Path, options: &Options) -> CopyResult
if seen_sources.contains(source) {
// FIXME: compare sources by the actual file they point to, not their path. (e.g. dir/file == dir/../dir/file in most cases)
show_warning!("source {} specified more than once", source.quote());
} else {
let found_hard_link = if preserve_hard_links && !source.is_dir() {
let dest = construct_dest_path(source, target, &target_type, options)?;
preserve_hardlinks(&mut hard_links, source, &dest)?
} else {
false
};
if !found_hard_link {
if let Err(error) = copy_source(
&progress_bar,
source,
target,
&target_type,
options,
&mut symlinked_files,
) {
show_error_if_needed(&error);
non_fatal_errors = true;
}
}
seen_sources.insert(source);
} else if let Err(error) = copy_source(
&progress_bar,
source,
target,
&target_type,
options,
&mut symlinked_files,
&mut copied_files,
) {
show_error_if_needed(&error);
non_fatal_errors = true;
}
seen_sources.insert(source);
}

if let Some(pb) = progress_bar {
Expand Down Expand Up @@ -1295,11 +1230,20 @@ fn copy_source(
target_type: &TargetType,
options: &Options,
symlinked_files: &mut HashSet<FileInformation>,
copied_files: &mut HashMap<FileInformation, PathBuf>,
) -> CopyResult<()> {
let source_path = Path::new(&source);
if source_path.is_dir() {
// Copy as directory
copy_directory(progress_bar, source, target, options, symlinked_files, true)
copy_directory(
progress_bar,
source,
target,
options,
symlinked_files,
copied_files,
true,
)
} else {
// Copy as file
let dest = construct_dest_path(source_path, target, target_type, options)?;
Expand All @@ -1309,6 +1253,7 @@ fn copy_source(
dest.as_path(),
options,
symlinked_files,
copied_files,
true,
);
if options.parents {
Expand Down Expand Up @@ -1570,6 +1515,24 @@ fn handle_existing_dest(
OverwriteMode::Clobber(ClobberMode::RemoveDestination) => {
fs::remove_file(dest)?;
}
OverwriteMode::Clobber(ClobberMode::Standard) => {
// Consider the following files:
//
// * `src/f` - a regular file
// * `src/link` - a hard link to `src/f`
// * `dest/src/f` - a different regular file
//
// In this scenario, if we do `cp -a src/ dest/`, it is
// possible that the order of traversal causes `src/link`
// to get copied first (to `dest/src/link`). In that case,
// in order to make sure `dest/src/link` is a hard link to
// `dest/src/f` and `dest/src/f` has the contents of
// `src/f`, we delete the existing file to allow the hard
// linking.
if options.preserve_hard_links() {
fs::remove_file(dest)?;
}
}
_ => (),
};

Expand Down Expand Up @@ -1643,6 +1606,7 @@ fn copy_file(
dest: &Path,
options: &Options,
symlinked_files: &mut HashSet<FileInformation>,
copied_files: &mut HashMap<FileInformation, PathBuf>,
source_in_command_line: bool,
) -> CopyResult<()> {
if (options.update == UpdateMode::ReplaceIfOlder || options.update == UpdateMode::ReplaceNone)
Expand Down Expand Up @@ -1686,6 +1650,19 @@ fn copy_file(
handle_existing_dest(source, dest, options, source_in_command_line)?;
}

if options.preserve_hard_links() {
// if we encounter a matching device/inode pair in the source tree
// we can arrange to create a hard link between the corresponding names
// in the destination tree.
if let Some(new_source) = copied_files.get(
&FileInformation::from_path(source, options.dereference(source_in_command_line))
.context(format!("cannot stat {}", source.quote()))?,
) {
std::fs::hard_link(new_source, dest)?;
return Ok(());
};
}

if options.verbose {
if let Some(pb) = progress_bar {
// Suspend (hide) the progress bar so the println won't overlap with the progress bar.
Expand Down Expand Up @@ -1873,6 +1850,11 @@ fn copy_file(

copy_attributes(source, dest, &options.attributes)?;

copied_files.insert(
FileInformation::from_path(source, options.dereference(source_in_command_line))?,
dest.to_path_buf(),
);

if let Some(progress_bar) = progress_bar {
progress_bar.inc(fs::metadata(source)?.len());
}
Expand Down
Loading

0 comments on commit bd0fb81

Please sign in to comment.