Skip to content

Commit

Permalink
Auto merge of #90596 - the8472:path-hash-opt, r=Mark-Simulacrum
Browse files Browse the repository at this point in the history
Optimize Eq and Hash for Path/PathBuf

```
# new

test path::tests::bench_hash_path_long                            ... bench:          86 ns/iter (+/- 1)
test path::tests::bench_hash_path_short                           ... bench:          13 ns/iter (+/- 1)
test path::tests::bench_path_hashset                              ... bench:         197 ns/iter (+/- 6)
test path::tests::bench_path_hashset_miss                         ... bench:          94 ns/iter (+/- 4)

# old

test path::tests::bench_hash_path_long                            ... bench:         192 ns/iter (+/- 2)
test path::tests::bench_hash_path_short                           ... bench:          33 ns/iter (+/- 1)
test path::tests::bench_path_hashset                              ... bench:       1,121 ns/iter (+/- 24)
test path::tests::bench_path_hashset_miss                         ... bench:         273 ns/iter (+/- 6)
```
  • Loading branch information
bors committed Nov 14, 2021
2 parents 3b2c454 + c1ea7bd commit c8e9497
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 11 deletions.
70 changes: 61 additions & 9 deletions library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,25 @@ impl FusedIterator for Components<'_> {}
impl<'a> cmp::PartialEq for Components<'a> {
#[inline]
fn eq(&self, other: &Components<'a>) -> bool {
let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self;

// Fast path for exact matches, e.g. for hashmap lookups.
// Don't explicitly compare the prefix or has_physical_root fields since they'll
// either be covered by the `path` buffer or are only relevant for `prefix_verbatim()`.
if self.path.len() == other.path.len()
&& self.front == other.front
&& self.back == State::Body
&& other.back == State::Body
&& self.prefix_verbatim() == other.prefix_verbatim()
{
// possible future improvement: this could bail out earlier if there were a
// reverse memcmp/bcmp comparing back to front
if self.path == other.path {
return true;
}
}

// compare back to front since absolute paths often share long prefixes
Iterator::eq(self.clone().rev(), other.clone().rev())
}
}
Expand Down Expand Up @@ -1013,13 +1032,12 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm
// The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
// the middle of one
if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
// this might benefit from a [u8]::first_mismatch simd implementation, if it existed
let first_difference =
match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
None => left.path.len().min(right.path.len()),
Some(diff) => diff,
};
// possible future improvement: a [u8]::first_mismatch simd implementation
let first_difference = match left.path.iter().zip(right.path).position(|(&a, &b)| a != b) {
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
None => left.path.len().min(right.path.len()),
Some(diff) => diff,
};

if let Some(previous_sep) =
left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
Expand Down Expand Up @@ -2873,9 +2891,43 @@ impl cmp::PartialEq for Path {
#[stable(feature = "rust1", since = "1.0.0")]
impl Hash for Path {
fn hash<H: Hasher>(&self, h: &mut H) {
for component in self.components() {
component.hash(h);
let bytes = self.as_u8_slice();
let prefix_len = match parse_prefix(&self.inner) {
Some(prefix) => {
prefix.hash(h);
prefix.len()
}
None => 0,
};
let bytes = &bytes[prefix_len..];

let mut component_start = 0;
let mut bytes_hashed = 0;

for i in 0..bytes.len() {
if is_sep_byte(bytes[i]) {
if i > component_start {
let to_hash = &bytes[component_start..i];
h.write(to_hash);
bytes_hashed += to_hash.len();
}

// skip over separator and optionally a following CurDir item
// since components() would normalize these away
component_start = i + match bytes[i..] {
[_, b'.', b'/', ..] | [_, b'.'] => 2,
_ => 1,
};
}
}

if component_start < bytes.len() {
let to_hash = &bytes[component_start..];
h.write(to_hash);
bytes_hashed += to_hash.len();
}

h.write_usize(bytes_hashed);
}
}

Expand Down
80 changes: 78 additions & 2 deletions library/std/src/path/tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use super::*;

use crate::collections::BTreeSet;
use crate::collections::hash_map::DefaultHasher;
use crate::collections::{BTreeSet, HashSet};
use crate::hash::Hasher;
use crate::rc::Rc;
use crate::sync::Arc;
use core::hint::black_box;
Expand Down Expand Up @@ -1632,7 +1634,25 @@ fn into_rc() {
fn test_ord() {
macro_rules! ord(
($ord:ident, $left:expr, $right:expr) => ( {
assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord);
use core::cmp::Ordering;

let left = Path::new($left);
let right = Path::new($right);
assert_eq!(left.cmp(&right), Ordering::$ord);
if (core::cmp::Ordering::$ord == Ordering::Equal) {
assert_eq!(left, right);

let mut hasher = DefaultHasher::new();
left.hash(&mut hasher);
let left_hash = hasher.finish();
hasher = DefaultHasher::new();
right.hash(&mut hasher);
let right_hash = hasher.finish();

assert_eq!(left_hash, right_hash, "hashes for {:?} and {:?} must match", left, right);
} else {
assert_ne!(left, right);
}
});
);

Expand Down Expand Up @@ -1693,3 +1713,59 @@ fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
set.insert(paths[500].as_path());
});
}

#[bench]
fn bench_path_hashset(b: &mut test::Bencher) {
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
let paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();

let mut set = HashSet::new();

paths.iter().for_each(|p| {
set.insert(p.as_path());
});

b.iter(|| {
set.remove(paths[500].as_path());
set.insert(black_box(paths[500].as_path()))
});
}

#[bench]
fn bench_path_hashset_miss(b: &mut test::Bencher) {
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
let paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();

let mut set = HashSet::new();

paths.iter().for_each(|p| {
set.insert(p.as_path());
});

let probe = PathBuf::from(prefix).join("other");

b.iter(|| set.remove(black_box(probe.as_path())));
}

#[bench]
fn bench_hash_path_short(b: &mut test::Bencher) {
let mut hasher = DefaultHasher::new();
let path = Path::new("explorer.exe");

b.iter(|| black_box(path).hash(&mut hasher));

black_box(hasher.finish());
}

#[bench]
fn bench_hash_path_long(b: &mut test::Bencher) {
let mut hasher = DefaultHasher::new();
let path =
Path::new("/aaaaa/aaaaaa/./../aaaaaaaa/bbbbbbbbbbbbb/ccccccccccc/ddddddddd/eeeeeee.fff");

b.iter(|| black_box(path).hash(&mut hasher));

black_box(hasher.finish());
}
1 change: 1 addition & 0 deletions library/std/src/sys/unix/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub fn is_verbatim_sep(b: u8) -> bool {
b == b'/'
}

#[inline]
pub fn parse_prefix(_: &OsStr) -> Option<Prefix<'_>> {
None
}
Expand Down

0 comments on commit c8e9497

Please sign in to comment.