From b9e22fc139c520a1339bd31b9abc7c537d9d1623 Mon Sep 17 00:00:00 2001 From: Nick Santos Date: Thu, 15 Jun 2023 19:49:55 -0400 Subject: [PATCH] walk: avoid stat()'ing files unnecessarily Before: ``` go test -bench=. . goos: linux goarch: amd64 pkg: github.com/tonistiigi/fsutil cpu: Intel(R) Core(TM) i7-10610U CPU @ 1.80GHz BenchmarkWalker/[1]-target-8 27994 41254 ns/op BenchmarkWalker/[1]-**/target-8 29540 40402 ns/op BenchmarkWalker/[2]-*/target-8 1017 1104834 ns/op BenchmarkWalker/[2]-**/target-8 920 1124731 ns/op BenchmarkWalker/[3]-*/*/target-8 34 40524094 ns/op BenchmarkWalker/[3]-**/target-8 32 40432908 ns/op BenchmarkWalker/[4]-*/*/*/target-8 31 44836714 ns/op BenchmarkWalker/[4]-**/target-8 26 44611379 ns/op BenchmarkWalker/[5]-*/*/*/*/target-8 85 14179975 ns/op BenchmarkWalker/[5]-**/target-8 78 13620031 ns/op BenchmarkWalker/[6]-*/*/*/*/*/target-8 44 23380013 ns/op BenchmarkWalker/[6]-**/target-8 51 22435264 ns/op BenchmarkWalker/[6]-**-!*/*/**-8 2101 580936 ns/op ``` After: ``` go test -bench=. . goos: linux goarch: amd64 pkg: github.com/tonistiigi/fsutil cpu: Intel(R) Core(TM) i7-10610U CPU @ 1.80GHz BenchmarkWalker/[1]-target-8 40788 26219 ns/op BenchmarkWalker/[1]-**/target-8 41642 26998 ns/op BenchmarkWalker/[2]-*/target-8 1725 660271 ns/op BenchmarkWalker/[2]-**/target-8 1806 645525 ns/op BenchmarkWalker/[3]-*/*/target-8 64 17609101 ns/op BenchmarkWalker/[3]-**/target-8 66 17563334 ns/op BenchmarkWalker/[4]-*/*/*/target-8 40 26241578 ns/op BenchmarkWalker/[4]-**/target-8 43 24575370 ns/op BenchmarkWalker/[5]-*/*/*/*/target-8 121 9564283 ns/op BenchmarkWalker/[5]-**/target-8 126 9412483 ns/op BenchmarkWalker/[6]-*/*/*/*/*/target-8 63 18703020 ns/op BenchmarkWalker/[6]-**/target-8 73 17287584 ns/op BenchmarkWalker/[6]-**-!*/*/**-8 3792 273148 ns/op ``` Signed-off-by: Nick Santos --- walker.go | 25 ++++++++++++++++++++----- walker_test.go | 27 +++++++++++++++++++++------ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/walker.go b/walker.go index f95101f..545f5e9 100644 --- a/walker.go +++ b/walker.go @@ -2,6 +2,7 @@ package fsutil import ( "context" + gofs "io/fs" "os" "path/filepath" "strings" @@ -47,11 +48,11 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err if err != nil { return errors.WithStack(&os.PathError{Op: "resolve", Path: root, Err: err}) } - fi, err := os.Stat(root) + rootFI, err := os.Stat(root) if err != nil { return errors.WithStack(err) } - if !fi.IsDir() { + if !rootFI.IsDir() { return errors.WithStack(&os.PathError{Op: "walk", Path: root, Err: syscall.ENOTDIR}) } @@ -126,7 +127,7 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err var parentDirs []visitedDir seenFiles := make(map[uint64]string) - return filepath.Walk(root, func(path string, fi os.FileInfo, walkErr error) (retErr error) { + return filepath.WalkDir(root, func(path string, dirEntry gofs.DirEntry, walkErr error) (retErr error) { defer func() { if retErr != nil && isNotExist(retErr) { retErr = filepath.SkipDir @@ -146,9 +147,10 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err var ( dir visitedDir isDir bool + fi gofs.FileInfo ) - if fi != nil { - isDir = fi.IsDir() + if dirEntry != nil { + isDir = dirEntry.IsDir() } if includeMatcher != nil || excludeMatcher != nil { @@ -161,6 +163,11 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err } if isDir { + fi, err = dirEntry.Info() + if err != nil { + return err + } + dir = visitedDir{ fi: fi, path: path, @@ -268,6 +275,14 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err dir.calledFn = true + // The FileInfo might have already been read further up. + if fi == nil { + fi, err = dirEntry.Info() + if err != nil { + return err + } + } + stat, err := mkstat(origpath, path, fi, seenFiles) if err != nil { return err diff --git a/walker_test.go b/walker_test.go index bcc89bb..cb4ea87 100644 --- a/walker_test.go +++ b/walker_test.go @@ -411,6 +411,7 @@ func BenchmarkWalker(b *testing.B) { for _, scenario := range []struct { maxDepth int pattern string + exclude string expected int }{{ maxDepth: 1, @@ -460,9 +461,18 @@ func BenchmarkWalker(b *testing.B) { maxDepth: 6, pattern: "**/target", expected: 2388, + }, { + maxDepth: 6, + pattern: "**", + exclude: "*/*/**", + expected: 20, }} { scenario := scenario // copy loop var - b.Run(fmt.Sprintf("[%d]-%s", scenario.maxDepth, scenario.pattern), func(b *testing.B) { + suffix := "" + if scenario.exclude != "" { + suffix = fmt.Sprintf("-!%s", scenario.exclude) + } + b.Run(fmt.Sprintf("[%d]-%s%s", scenario.maxDepth, scenario.pattern, suffix), func(b *testing.B) { tmpdir, err := os.MkdirTemp("", "walk") if err != nil { b.Error(err) @@ -478,12 +488,17 @@ func BenchmarkWalker(b *testing.B) { for i := 0; i < b.N; i++ { count := 0 - err = Walk(context.Background(), tmpdir, &WalkOpt{ + walkOpt := &WalkOpt{ IncludePatterns: []string{scenario.pattern}, - }, func(path string, fi os.FileInfo, err error) error { - count++ - return nil - }) + } + if scenario.exclude != "" { + walkOpt.ExcludePatterns = []string{scenario.exclude} + } + err = Walk(context.Background(), tmpdir, walkOpt, + func(path string, fi os.FileInfo, err error) error { + count++ + return nil + }) if err != nil { b.Error(err) }