Smarter (and looser) link equivalency logic

pypa · Jun 18, 2021 · c55d17c · c55d17c
1 parent 7c3abcc
commit c55d17c
Showing 1 changed file with 46 additions and 3 deletions.
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
@@ -2,7 +2,7 @@
 import posixpath
 import re
 import urllib.parse
-from typing import TYPE_CHECKING, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
 
 from pip._internal.utils.filetypes import WHEEL_EXTENSION
 from pip._internal.utils.hashes import Hashes
@@ -242,7 +242,50 @@ def is_hash_allowed(self, hashes):
  return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
 
 
-# TODO: Relax this comparison logic to ignore, for example, fragments.
+class _CleanResult(NamedTuple):
+ """Convert link for equivalency check.
+
+ This is used in the resolver to check whether two URL-specified requirements
+ likely point to the same distribution and can be considered equivalent. This
+ equivalency logic avoids comparing URLs literally, which can be too strict
+ (e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
+
+ Currently this does three things:
+
+ 1. Drop the basic auth part. This is technically wrong since a server can
+ serve different content based on auth, but if it does that, it is even
+ impossible to guarantee two URLs without auth are equivalent, since
+ the user can input different auth information when prompted. So the
+ practical solution is to assume the auth doesn't affect the response.
+ 2. Parse the query to avoid the ordering issue.
+ 3. Parse the fragment, and explicitly drop the "egg=" part since it is
+ commonly provided as the project name for compatibility. This is wrong in
+ the strictest sense, but too many people are doing it.
+
+ Note that query value ordering under the same key in query and fragment are
+ NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are still considered different.
+ """
+
+ parsed: urllib.parse.SplitResult
+ query: Dict[str, List[str]]
+ fragment: Dict[str, List[str]]
+
+ @classmethod
+ def from_link(cls, link: Link) -> "_CleanResult":
+ parsed = link._parsed_url
+ netloc = parsed.netloc.rsplit("@", 1)[-1]
+ # The fragment does not necessarily use the query string format
+ # (it's a pip-specific syntax), so we set keep_blank_values to keep
+ # a fragment that's not a key-value pair (e.g. "#title_1").
+ frag_qs = urllib.parse.parse_qs(parsed.fragment, keep_blank_values=True)
+ frag_qs.pop("egg", None)
+ return _CleanResult(
+ parsed=parsed._replace(netloc=netloc, query="", fragment=""),
+ query=urllib.parse.parse_qs(parsed.query),
+ fragment=frag_qs,
+ )
+
+
 def links_equivalent(link1, link2):
  # type: (Link, Link) -> bool
- return link1 == link2
+ return _CleanResult.from_link(link1) == _CleanResult.from_link(link2)