Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smarter (and looser) link equivalency logic #10079

Merged
merged 1 commit into from
Jul 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions news/10002.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
New resolver: Loosen URL comparison logic when checking for direct URL reference
equivalency. The logic includes the following notable characteristics:

* The authentication part of the URL is explicitly ignored.
* Most of the fragment part, including ``egg=``, is explicitly ignored. Only
``subdirectory=`` and hash values (e.g. ``sha256=``) are kept.
* The query part of the URL is parsed to allow ordering differences.
70 changes: 66 additions & 4 deletions src/pip/_internal/models/link.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import functools
import logging
import os
import posixpath
import re
import urllib.parse
from typing import TYPE_CHECKING, Optional, Tuple, Union
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union

from pip._internal.utils.filetypes import WHEEL_EXTENSION
from pip._internal.utils.hashes import Hashes
Expand All @@ -17,6 +19,11 @@
if TYPE_CHECKING:
from pip._internal.index.collector import HTMLPage

logger = logging.getLogger(__name__)


_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")


class Link(KeyBasedCompareMixin):
"""Represents a parsed link from a Package Index's simple URL
Expand Down Expand Up @@ -159,7 +166,7 @@ def subdirectory_fragment(self) -> Optional[str]:
return match.group(1)

_hash_re = re.compile(
r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)'
r'({choices})=([a-f0-9]+)'.format(choices="|".join(_SUPPORTED_HASHES))
)

@property
Expand Down Expand Up @@ -218,6 +225,61 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)


# TODO: Relax this comparison logic to ignore, for example, fragments.
class _CleanResult(NamedTuple):
"""Convert link for equivalency check.

This is used in the resolver to check whether two URL-specified requirements
likely point to the same distribution and can be considered equivalent. This
equivalency logic avoids comparing URLs literally, which can be too strict
(e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.

Currently this does three things:

1. Drop the basic auth part. This is technically wrong since a server can
serve different content based on auth, but if it does that, it is even
impossible to guarantee two URLs without auth are equivalent, since
the user can input different auth information when prompted. So the
practical solution is to assume the auth doesn't affect the response.
2. Parse the query to avoid the ordering issue. Note that ordering under the
same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are
still considered different.
3. Explicitly drop most of the fragment part, except ``subdirectory=`` and
hash values, since it should have no impact the downloaded content. Note
that this drops the "egg=" part historically used to denote the requested
project (and extras), which is wrong in the strictest sense, but too many
people are supplying it inconsistently to cause superfluous resolution
conflicts, so we choose to also ignore them.
"""

parsed: urllib.parse.SplitResult
query: Dict[str, List[str]]
subdirectory: str
hashes: Dict[str, str]

@classmethod
def from_link(cls, link: Link) -> "_CleanResult":
parsed = link._parsed_url
netloc = parsed.netloc.rsplit("@", 1)[-1]
fragment = urllib.parse.parse_qs(parsed.fragment)
if "egg" in fragment:
logger.debug("Ignoring egg= fragment in %s", link)
uranusjr marked this conversation as resolved.
Show resolved Hide resolved
try:
# If there are multiple subdirectory values, use the first one.
# This matches the behavior of Link.subdirectory_fragment.
subdirectory = fragment["subdirectory"][0]
except (IndexError, KeyError):
subdirectory = ""
# If there are multiple hash values under the same algorithm, use the
# first one. This matches the behavior of Link.hash_value.
hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment}
return cls(
parsed=parsed._replace(netloc=netloc, query="", fragment=""),
query=urllib.parse.parse_qs(parsed.query),
subdirectory=subdirectory,
hashes=hashes,
)


@functools.lru_cache(maxsize=None)
def links_equivalent(link1: Link, link2: Link) -> bool:
return link1 == link2
return _CleanResult.from_link(link1) == _CleanResult.from_link(link2)
13 changes: 2 additions & 11 deletions tests/functional/test_install_reqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,25 +423,16 @@ def test_constraints_constrain_to_local(script, data, resolver_variant):
assert 'Running setup.py install for singlemodule' in result.stdout


def test_constrained_to_url_install_same_url(script, data, resolver_variant):
def test_constrained_to_url_install_same_url(script, data):
to_install = data.src.joinpath("singlemodule")
constraints = path_to_url(to_install) + "#egg=singlemodule"
script.scratch_path.joinpath("constraints.txt").write_text(constraints)
result = script.pip(
'install', '--no-index', '-f', data.find_links, '-c',
script.scratch_path / 'constraints.txt', to_install,
allow_stderr_warning=True,
expect_error=(resolver_variant == "2020-resolver"),
)
if resolver_variant == "2020-resolver":
assert 'Cannot install singlemodule 0.0.1' in result.stderr, str(result)
assert (
'because these package versions have conflicting dependencies.'
in result.stderr
), str(result)
else:
assert ('Running setup.py install for singlemodule'
in result.stdout), str(result)
assert 'Running setup.py install for singlemodule' in result.stdout, str(result)


def test_double_install_spurious_hash_mismatch(
Expand Down
90 changes: 90 additions & 0 deletions tests/functional/test_new_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,96 @@ def test_new_resolver_avoids_incompatible_wheel_tags_in_constraint_url(
assert_not_installed(script, "dep")


@pytest.mark.parametrize(
"suffixes_equivalent, depend_suffix, request_suffix",
[
pytest.param(
True,
"#egg=foo",
"",
id="drop-depend-egg",
),
pytest.param(
True,
"",
"#egg=foo",
id="drop-request-egg",
),
pytest.param(
True,
"#subdirectory=bar&egg=foo",
"#subdirectory=bar&egg=bar",
id="drop-egg-only",
),
pytest.param(
True,
"#subdirectory=bar&egg=foo",
"#egg=foo&subdirectory=bar",
id="fragment-ordering",
),
pytest.param(
True,
"?a=1&b=2",
"?b=2&a=1",
id="query-opordering",
),
pytest.param(
False,
"#sha512=1234567890abcdef",
"#sha512=abcdef1234567890",
id="different-keys",
),
pytest.param(
False,
"#sha512=1234567890abcdef",
"#md5=1234567890abcdef",
id="different-values",
),
uranusjr marked this conversation as resolved.
Show resolved Hide resolved
pytest.param(
False,
"#subdirectory=bar&egg=foo",
"#subdirectory=rex",
id="drop-egg-still-different",
),
Comment on lines +1825 to +1830
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit-ish: Add another test for the reverse of this, and move this to the top with the other drop- tests?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverse as in reversing subdirectory and egg, or the two values to arguments?

],
)
def test_new_resolver_direct_url_equivalent(
tmp_path,
script,
suffixes_equivalent,
depend_suffix,
request_suffix,
):
pkga = create_basic_wheel_for_package(script, name="pkga", version="1")
pkgb = create_basic_wheel_for_package(
script,
name="pkgb",
version="1",
depends=[f"pkga@{path_to_url(pkga)}{depend_suffix}"],
)

# Make pkgb visible via --find-links, but not pkga.
find_links = tmp_path.joinpath("find_links")
find_links.mkdir()
with open(pkgb, "rb") as f:
find_links.joinpath(pkgb.name).write_bytes(f.read())

# Install pkgb from --find-links, and pkga directly but from a different
# URL suffix as specified in pkgb. This should work!
script.pip(
"install",
"--no-cache-dir", "--no-index",
"--find-links", str(find_links),
f"{path_to_url(pkga)}{request_suffix}", "pkgb",
expect_error=(not suffixes_equivalent),
)

if suffixes_equivalent:
assert_installed(script, pkga="1", pkgb="1")
else:
assert_not_installed(script, "pkga", "pkgb")


def test_new_resolver_direct_url_with_extras(tmp_path, script):
pkg1 = create_basic_wheel_for_package(script, name="pkg1", version="1")
pkg2 = create_basic_wheel_for_package(
Expand Down
55 changes: 54 additions & 1 deletion tests/unit/test_link.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from pip._internal.models.link import Link
from pip._internal.models.link import Link, links_equivalent
from pip._internal.utils.hashes import Hashes


Expand Down Expand Up @@ -138,3 +138,56 @@ def test_is_hash_allowed__none_hashes(self, hashes, expected):
def test_is_vcs(self, url, expected):
link = Link(url)
assert link.is_vcs is expected


@pytest.mark.parametrize(
"url1, url2",
[
pytest.param(
"https://example.com/foo#egg=foo",
"https://example.com/foo",
id="drop-egg",
),
pytest.param(
"https://example.com/foo#subdirectory=bar&egg=foo",
"https://example.com/foo#subdirectory=bar&egg=bar",
id="drop-egg-only",
),
pytest.param(
"https://example.com/foo#subdirectory=bar&egg=foo",
"https://example.com/foo#egg=foo&subdirectory=bar",
id="fragment-ordering",
),
pytest.param(
"https://example.com/foo?a=1&b=2",
"https://example.com/foo?b=2&a=1",
id="query-opordering",
),
],
)
def test_links_equivalent(url1, url2):
assert links_equivalent(Link(url1), Link(url2))
uranusjr marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.parametrize(
"url1, url2",
[
pytest.param(
"https://example.com/foo#sha512=1234567890abcdef",
"https://example.com/foo#sha512=abcdef1234567890",
id="different-keys",
),
pytest.param(
"https://example.com/foo#sha512=1234567890abcdef",
"https://example.com/foo#md5=1234567890abcdef",
id="different-values",
),
pytest.param(
"https://example.com/foo#subdirectory=bar&egg=foo",
"https://example.com/foo#subdirectory=rex",
id="drop-egg-still-different",
),
],
)
def test_links_equivalent_false(url1, url2):
assert not links_equivalent(Link(url1), Link(url2))
uranusjr marked this conversation as resolved.
Show resolved Hide resolved