Skip to content

Commit

Permalink
contenthash: implement proper Linux symlink semantics for getFollowLinks
Browse files Browse the repository at this point in the history
This patch is part of a series which fixes the symlink resolution
semantics within BuildKit.

You cannot implement symlink resolution on Linux naively using
path.Join. A correct implementation requires tracking the current path
and applying each new component individually. This implementation is
loosely based on github.com/cyphar/filepath-securejoin.

Things to note:

 * The previous implementation of getFollowLinks actually only resolved
   symlinks in parent components of the path, leading to some callers to
   have to implement resolution manually (and incorrectly) by calling
   getFollowLinks several times. In addition to being incorrect and
   somewhat difficult to follow, it also lead to the ELOOP limit being
   much higher than 255 (while some callers used getFollowLinksWalk,
   most used getFollowLinks which reset the limit for each iteration).

   So, add getFollowParentLinks to allow for callers to decide which
   behaviour they need. getFollowLinks now follows all links
   (correctly).

 * The trailing-slash-is-significant behaviour in the cache (dir vs
   dir header) needs to be handled specially because on Linux there is
   no distinction between "a/" and "a" (assuming a is not a symlink,
   that is) and so filepath-securejoin's implementation didn't care
   about trailing slashes. The previous implementation hid the trailing
   path behaviour purely in the splitKey() implementation, making the
   need for this quite subtle.

 * The previous implementation was recursive, which in theory would
   allow you to find some paths slightly more quickly (if you find a
   valid ancestor you don't need to check above it) at the cost of
   making some lookups more expensive (a path with an invalid ancestor
   very early on in the path).

   However, implementing the correct lookup algorithm recursively proved
   to be quite difficult. It is possible to implement a similar
   optimisation (try to find the first non-symlink parent component and
   iterate from there), this complicates the implementation a fair
   amount and it doesn't seem clear that the performance tradeoff is a
   benefit in general.

   Ultimately, cache lookups are quite fast and so there probably isn't
   a practical performance difference between approaches.

Signed-off-by: Aleksa Sarai <[email protected]>
  • Loading branch information
cyphar committed May 2, 2024
1 parent bcacb3e commit 4fa7ea6
Showing 1 changed file with 88 additions and 56 deletions.
144 changes: 88 additions & 56 deletions cache/contenthash/checksum.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
// involves a symlink. That will match fsutil behavior of
// calling functions such as stat and walk.
var cr *CacheRecord
k, cr, err = getFollowLinks(root, k, true)
k, cr, err = getFollowParentLinks(root, k, true)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -751,36 +751,16 @@ func wildcardPrefix(root *iradix.Node, p string) (string, []byte, bool, error) {
return "", nil, false, nil
}

linksWalked := 0
k, cr, err := getFollowLinksWalk(root, convertPathToKey([]byte(d1)), true, &linksWalked)
// Only resolve the final symlink component if there are components in the
// wildcard segment.
resolveFn := getFollowParentLinks
if d2 != "" {
resolveFn = getFollowLinks
}
k, cr, err := resolveFn(root, convertPathToKey([]byte(d1)), true)
if err != nil {
return "", k, false, err
}

if d2 != "" && cr != nil && cr.Type == CacheRecordTypeSymlink {
// getFollowLinks only handles symlinks in path
// components before the last component, so
// handle last component in d1 specially.
resolved := string(convertKeyToPath(k))
for {
v, ok := root.Get(k)

if !ok {
return d1, k, false, nil
}
if v.(*CacheRecord).Type != CacheRecordTypeSymlink {
break
}

linksWalked++
if linksWalked > 255 {
return "", k, false, errors.Errorf("too many links")
}

resolved := cleanLink(resolved, v.(*CacheRecord).Linkname)
k = convertPathToKey([]byte(resolved))
}
}
return d1, k, cr != nil, nil
}

Expand Down Expand Up @@ -892,7 +872,7 @@ func (cc *cacheContext) lazyChecksum(ctx context.Context, m *mount, p string) (*

func (cc *cacheContext) checksum(ctx context.Context, root *iradix.Node, txn *iradix.Txn, m *mount, k []byte, follow bool) (*CacheRecord, bool, error) {
origk := k
k, cr, err := getFollowLinks(root, k, follow)
k, cr, err := getFollowParentLinks(root, k, follow)
if err != nil {
return nil, false, err
}
Expand Down Expand Up @@ -1071,12 +1051,10 @@ func (cc *cacheContext) scanPath(ctx context.Context, m *mount, p string) (retEr
return nil
}

func getFollowLinks(root *iradix.Node, k []byte, follow bool) ([]byte, *CacheRecord, error) {
var linksWalked int
return getFollowLinksWalk(root, k, follow, &linksWalked)
}

func getFollowLinksWalk(root *iradix.Node, k []byte, follow bool, linksWalked *int) ([]byte, *CacheRecord, error) {
// getFollowParentLinks is effectively O_PATH|O_NOFOLLOW, where the final
// component is looked up without doing any symlink resolution (if it is a
// symlink).
func getFollowParentLinks(root *iradix.Node, k []byte, follow bool) ([]byte, *CacheRecord, error) {
v, ok := root.Get(k)
if ok {
return k, v.(*CacheRecord), nil
Expand All @@ -1085,41 +1063,95 @@ func getFollowLinksWalk(root *iradix.Node, k []byte, follow bool, linksWalked *i
return k, nil, nil
}

// Only fully evaluate the parent path.
dir, file := splitKey(k)

k, parent, err := getFollowLinksWalk(root, dir, follow, linksWalked)
dir, _, err := getFollowLinks(root, dir, follow)
if err != nil {
return nil, nil, err
}
if parent != nil {
if parent.Type == CacheRecordTypeSymlink {
*linksWalked++
if *linksWalked > 255 {
return nil, nil, errors.Errorf("too many links")
}

link := cleanLink(string(convertKeyToPath(dir)), parent.Linkname)
return getFollowLinksWalk(root, append(convertPathToKey([]byte(link)), file...), follow, linksWalked)
}
}
k = append(k, file...)
// Do a direct lookup of the final component.
k = append(dir, file...)
v, ok = root.Get(k)
if ok {
return k, v.(*CacheRecord), nil
}
return k, nil, nil
}

func cleanLink(dir, linkname string) string {
dirPath := path.Clean(dir)
if dirPath == "." || dirPath == "/" {
dirPath = ""
func getFollowLinks(root *iradix.Node, k []byte, follow bool) ([]byte, *CacheRecord, error) {
v, ok := root.Get(k)
if ok && v.(*CacheRecord).Type != CacheRecordTypeSymlink {
return k, v.(*CacheRecord), nil
}
if !follow || len(k) == 0 {
return k, nil, nil
}

var (
currentPath = "/"
remainingPath = string(convertKeyToPath(k))
linksWalked int
cr *CacheRecord
)
// Trailing slashes are significant for the cache, but path.Clean strips
// them. We only care about the slash for the final lookup.
remainingPath, hadTrailingSlash := strings.CutSuffix(remainingPath, "/")
for remainingPath != "" {
// Get next component.
var part string
if i := strings.IndexRune(remainingPath, '/'); i == -1 {
part, remainingPath = remainingPath, ""
} else {
part, remainingPath = remainingPath[:i], remainingPath[i+1:]
}

// Apply the component to the path. Since it is a single component, and
// our current path contains no symlinks, we can just apply it
// leixically.
nextPath := path.Join("/", currentPath, part)
if nextPath == "/" {
// If we hit the root, just treat it as a directory.
currentPath = "/"
continue
}
if nextPath == currentPath {
// noop component
continue
}

cr = nil
v, ok := root.Get(convertPathToKey([]byte(nextPath)))
if ok {
cr = v.(*CacheRecord)
}
if !ok || cr.Type != CacheRecordTypeSymlink {
currentPath = nextPath
continue
}

linksWalked++
if linksWalked > maxSymlinkLimit {
return nil, nil, errTooManyLinks
}

remainingPath = cr.Linkname + "/" + remainingPath
if path.IsAbs(cr.Linkname) {
currentPath = "/"
}
}
link := path.Clean(linkname)
if !path.IsAbs(link) {
return path.Join("/", path.Join(path.Dir(dirPath), link))
// We've already looked up the final component. However, if there was a
// trailing slash in the original path, we need to do the lookup again with
// the slash applied.
if hadTrailingSlash {
cr = nil
currentPath += "/"
v, ok := root.Get(convertPathToKey([]byte(currentPath)))
if ok {
cr = v.(*CacheRecord)
}
}
return link
return convertPathToKey([]byte(currentPath)), cr, nil
}

func prepareDigest(fp, p string, fi os.FileInfo) (digest.Digest, error) {
Expand Down

0 comments on commit 4fa7ea6

Please sign in to comment.