From bb8824439ecd8dabe81d2724aedacb0870ccd653 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Wed, 6 Oct 2021 12:04:33 -0400 Subject: [PATCH 1/6] (joe) requirement creation is very expensive, and at least in my test case there were only ~200 unique requirement objects created in ~5-10 minutes of resolution time --- news/10550.feature.rst | 1 + src/pip/_internal/req/constructors.py | 14 ++++++++++---- src/pip/_internal/resolution/resolvelib/factory.py | 9 +++++---- tests/unit/test_req.py | 8 ++++++++ 4 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 news/10550.feature.rst diff --git a/news/10550.feature.rst b/news/10550.feature.rst new file mode 100644 index 00000000000..bd34476442b --- /dev/null +++ b/news/10550.feature.rst @@ -0,0 +1 @@ +Improve performance of dependency resolution. diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py index 0a6b3367e1e..ef0a82bb80f 100644 --- a/src/pip/_internal/req/constructors.py +++ b/src/pip/_internal/req/constructors.py @@ -8,6 +8,7 @@ InstallRequirement. """ +import functools import logging import os import re @@ -39,6 +40,11 @@ operators = Specifier._operators.keys() +@functools.lru_cache(maxsize=None) +def get_or_create_requirement(req_string: str) -> Requirement: + return Requirement(req_string) + + def _strip_extras(path: str) -> Tuple[str, Optional[str]]: m = re.match(r"^(.+)(\[[^\]]+\])$", path) extras = None @@ -54,7 +60,7 @@ def _strip_extras(path: str) -> Tuple[str, Optional[str]]: def convert_extras(extras: Optional[str]) -> Set[str]: if not extras: return set() - return Requirement("placeholder" + extras.lower()).extras + return get_or_create_requirement("placeholder" + extras.lower()).extras def parse_editable(editable_req: str) -> Tuple[Optional[str], str, Set[str]]: @@ -83,7 +89,7 @@ def parse_editable(editable_req: str) -> Tuple[Optional[str], str, Set[str]]: return ( package_name, url_no_extras, - Requirement("placeholder" + extras.lower()).extras, + get_or_create_requirement("placeholder" + extras.lower()).extras, ) else: return package_name, url_no_extras, set() @@ -309,7 +315,7 @@ def with_source(text: str) -> str: def _parse_req_string(req_as_string: str) -> Requirement: try: - req = Requirement(req_as_string) + req = get_or_create_requirement(req_as_string) except InvalidRequirement: if os.path.sep in req_as_string: add_msg = "It looks like a path." @@ -386,7 +392,7 @@ def install_req_from_req_string( user_supplied: bool = False, ) -> InstallRequirement: try: - req = Requirement(req_string) + req = get_or_create_requirement(req_string) except InvalidRequirement: raise InstallationError(f"Invalid requirement: '{req_string}'") diff --git a/src/pip/_internal/resolution/resolvelib/factory.py b/src/pip/_internal/resolution/resolvelib/factory.py index e96764d31ea..13d47519db6 100644 --- a/src/pip/_internal/resolution/resolvelib/factory.py +++ b/src/pip/_internal/resolution/resolvelib/factory.py @@ -18,8 +18,9 @@ cast, ) -from pip._vendor.packaging.requirements import InvalidRequirement -from pip._vendor.packaging.requirements import Requirement as PackagingRequirement +from pip._vendor.packaging.requirements import ( + InvalidRequirement, +) from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.resolvelib import ResolutionImpossible @@ -38,7 +39,7 @@ from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel from pip._internal.operations.prepare import RequirementPreparer -from pip._internal.req.constructors import install_req_from_link_and_ireq +from pip._internal.req.constructors import get_or_create_requirement, install_req_from_link_and_ireq from pip._internal.req.req_install import ( InstallRequirement, check_invalid_constraint_type, @@ -365,7 +366,7 @@ def find_candidates( # If the current identifier contains extras, add explicit candidates # from entries from extra-less identifier. with contextlib.suppress(InvalidRequirement): - parsed_requirement = PackagingRequirement(identifier) + parsed_requirement = get_or_create_requirement(identifier) explicit_candidates.update( self._iter_explicit_candidates_from_base( requirements.get(parsed_requirement.name, ()), diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index c4b97cf1c30..209a9ed9956 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -658,6 +658,14 @@ def test_parse_editable_local_extras( ) +def test_get_or_create_caching() -> None: + """test caching of get_or_create requirement""" + teststr = "affinegap==1.10" + assert get_or_create_requirement(teststr) == Requirement(teststr) + assert not (get_or_create_requirement(teststr) is Requirement(teststr)) + assert get_or_create_requirement(teststr) is get_or_create_requirement(teststr) + + def test_exclusive_environment_markers() -> None: """Make sure RequirementSet accepts several excluding env markers""" eq36 = install_req_from_line("Django>=1.6.10,<1.7 ; python_version == '3.6'") From bd87828eae4fb46a002a5d8aaada39e4716edfef Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Wed, 6 Oct 2021 12:48:48 -0400 Subject: [PATCH 2/6] (joe) black + imports --- src/pip/_internal/resolution/resolvelib/factory.py | 9 +++++---- tests/unit/test_req.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/pip/_internal/resolution/resolvelib/factory.py b/src/pip/_internal/resolution/resolvelib/factory.py index 13d47519db6..7c0027cc930 100644 --- a/src/pip/_internal/resolution/resolvelib/factory.py +++ b/src/pip/_internal/resolution/resolvelib/factory.py @@ -18,9 +18,7 @@ cast, ) -from pip._vendor.packaging.requirements import ( - InvalidRequirement, -) +from pip._vendor.packaging.requirements import InvalidRequirement from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.resolvelib import ResolutionImpossible @@ -39,7 +37,10 @@ from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel from pip._internal.operations.prepare import RequirementPreparer -from pip._internal.req.constructors import get_or_create_requirement, install_req_from_link_and_ireq +from pip._internal.req.constructors import ( + get_or_create_requirement, + install_req_from_link_and_ireq, +) from pip._internal.req.req_install import ( InstallRequirement, check_invalid_constraint_type, diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 209a9ed9956..cf58cdece45 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -28,6 +28,7 @@ from pip._internal.req.constructors import ( _get_url_from_path, _looks_like_path, + get_or_create_requirement, install_req_from_editable, install_req_from_line, install_req_from_parsed_requirement, From 0862d40d35cf8c1776964573f28cb0dfdcdb47a8 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Wed, 6 Oct 2021 13:04:23 -0400 Subject: [PATCH 3/6] (joe) work around no equality operator for requirement objects --- tests/unit/test_req.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index cf58cdece45..9c5161a1255 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -662,7 +662,12 @@ def test_parse_editable_local_extras( def test_get_or_create_caching() -> None: """test caching of get_or_create requirement""" teststr = "affinegap==1.10" - assert get_or_create_requirement(teststr) == Requirement(teststr) + from_helper = get_or_create_requirement(teststr) + freshly_made = Requirement(teststr) + # Requirement doesn't have an equality operator (yet) so test + # equality of attribute for list of attributes + for iattr in ["name", "url", "extras", "specifier", "marker"]: + assert getattr(from_helper, iattr) == getattr(freshly_made, iattr) assert not (get_or_create_requirement(teststr) is Requirement(teststr)) assert get_or_create_requirement(teststr) is get_or_create_requirement(teststr) From dee6690f068bab77e6014aea9b8523051e62e8a1 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Thu, 7 Oct 2021 07:11:33 -0400 Subject: [PATCH 4/6] Renames, moves, docstrings, comments --- src/pip/_internal/req/constructors.py | 15 +++++---------- .../_internal/resolution/resolvelib/factory.py | 8 +++----- src/pip/_internal/utils/packaging.py | 13 +++++++++++++ tests/unit/test_packaging.py | 16 +++++++++++++++- tests/unit/test_req.py | 14 -------------- 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py index ef0a82bb80f..5cf923515d7 100644 --- a/src/pip/_internal/req/constructors.py +++ b/src/pip/_internal/req/constructors.py @@ -8,7 +8,6 @@ InstallRequirement. """ -import functools import logging import os import re @@ -27,6 +26,7 @@ from pip._internal.req.req_install import InstallRequirement from pip._internal.utils.filetypes import is_archive_file from pip._internal.utils.misc import is_installable_dir +from pip._internal.utils.packaging import get_requirement from pip._internal.utils.urls import path_to_url from pip._internal.vcs import is_url, vcs @@ -40,11 +40,6 @@ operators = Specifier._operators.keys() -@functools.lru_cache(maxsize=None) -def get_or_create_requirement(req_string: str) -> Requirement: - return Requirement(req_string) - - def _strip_extras(path: str) -> Tuple[str, Optional[str]]: m = re.match(r"^(.+)(\[[^\]]+\])$", path) extras = None @@ -60,7 +55,7 @@ def _strip_extras(path: str) -> Tuple[str, Optional[str]]: def convert_extras(extras: Optional[str]) -> Set[str]: if not extras: return set() - return get_or_create_requirement("placeholder" + extras.lower()).extras + return get_requirement("placeholder" + extras.lower()).extras def parse_editable(editable_req: str) -> Tuple[Optional[str], str, Set[str]]: @@ -89,7 +84,7 @@ def parse_editable(editable_req: str) -> Tuple[Optional[str], str, Set[str]]: return ( package_name, url_no_extras, - get_or_create_requirement("placeholder" + extras.lower()).extras, + get_requirement("placeholder" + extras.lower()).extras, ) else: return package_name, url_no_extras, set() @@ -315,7 +310,7 @@ def with_source(text: str) -> str: def _parse_req_string(req_as_string: str) -> Requirement: try: - req = get_or_create_requirement(req_as_string) + req = get_requirement(req_as_string) except InvalidRequirement: if os.path.sep in req_as_string: add_msg = "It looks like a path." @@ -392,7 +387,7 @@ def install_req_from_req_string( user_supplied: bool = False, ) -> InstallRequirement: try: - req = get_or_create_requirement(req_string) + req = get_requirement(req_string) except InvalidRequirement: raise InstallationError(f"Invalid requirement: '{req_string}'") diff --git a/src/pip/_internal/resolution/resolvelib/factory.py b/src/pip/_internal/resolution/resolvelib/factory.py index 7c0027cc930..766dc26c0f9 100644 --- a/src/pip/_internal/resolution/resolvelib/factory.py +++ b/src/pip/_internal/resolution/resolvelib/factory.py @@ -37,10 +37,7 @@ from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel from pip._internal.operations.prepare import RequirementPreparer -from pip._internal.req.constructors import ( - get_or_create_requirement, - install_req_from_link_and_ireq, -) +from pip._internal.req.constructors import install_req_from_link_and_ireq from pip._internal.req.req_install import ( InstallRequirement, check_invalid_constraint_type, @@ -48,6 +45,7 @@ from pip._internal.resolution.base import InstallRequirementProvider from pip._internal.utils.compatibility_tags import get_supported from pip._internal.utils.hashes import Hashes +from pip._internal.utils.packaging import get_requirement from pip._internal.utils.virtualenv import running_under_virtualenv from .base import Candidate, CandidateVersion, Constraint, Requirement @@ -367,7 +365,7 @@ def find_candidates( # If the current identifier contains extras, add explicit candidates # from entries from extra-less identifier. with contextlib.suppress(InvalidRequirement): - parsed_requirement = get_or_create_requirement(identifier) + parsed_requirement = get_requirement(identifier) explicit_candidates.update( self._iter_explicit_candidates_from_base( requirements.get(parsed_requirement.name, ()), diff --git a/src/pip/_internal/utils/packaging.py b/src/pip/_internal/utils/packaging.py index 9687c86aa3c..9a8f2fe24e7 100644 --- a/src/pip/_internal/utils/packaging.py +++ b/src/pip/_internal/utils/packaging.py @@ -1,3 +1,4 @@ +import functools import logging from email.message import Message from email.parser import FeedParser @@ -5,6 +6,7 @@ from pip._vendor import pkg_resources from pip._vendor.packaging import specifiers, version +from pip._vendor.packaging.requirements import Requirement from pip._vendor.pkg_resources import Distribution from pip._internal.exceptions import NoneMetadataError @@ -69,3 +71,14 @@ def get_installer(dist: Distribution) -> str: if line.strip(): return line.strip() return "" + + +@functools.lru_cache(maxsize=None) +def get_requirement(req_string: str) -> Requirement: + """Construct a packaging.Requirement object with caching""" + # Parsing requirement strings is expensive, and is also expected to happen + # with a low diversity of different arguments (at least relative the number + # constructed). This method adds a cache to requirement object creation to + # minimize repeated parsing of the same string to construct equivalent + # Requirement objects. + return Requirement(req_string) diff --git a/tests/unit/test_packaging.py b/tests/unit/test_packaging.py index 8750ca85338..8a4437ce20e 100644 --- a/tests/unit/test_packaging.py +++ b/tests/unit/test_packaging.py @@ -2,8 +2,9 @@ import pytest from pip._vendor.packaging import specifiers +from pip._vendor.packaging.requirements import Requirement -from pip._internal.utils.packaging import check_requires_python +from pip._internal.utils.packaging import check_requires_python, get_requirement @pytest.mark.parametrize( @@ -27,3 +28,16 @@ def test_check_requires_python__invalid() -> None: """ with pytest.raises(specifiers.InvalidSpecifier): check_requires_python("invalid", (3, 6, 5)) + + +def test_get_or_create_caching() -> None: + """test caching of get_or_create requirement""" + teststr = "affinegap==1.10" + from_helper = get_requirement(teststr) + freshly_made = Requirement(teststr) + # Requirement doesn't have an equality operator (yet) so test + # equality of attribute for list of attributes + for iattr in ["name", "url", "extras", "specifier", "marker"]: + assert getattr(from_helper, iattr) == getattr(freshly_made, iattr) + assert not (get_requirement(teststr) is Requirement(teststr)) + assert get_requirement(teststr) is get_requirement(teststr) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 9c5161a1255..c4b97cf1c30 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -28,7 +28,6 @@ from pip._internal.req.constructors import ( _get_url_from_path, _looks_like_path, - get_or_create_requirement, install_req_from_editable, install_req_from_line, install_req_from_parsed_requirement, @@ -659,19 +658,6 @@ def test_parse_editable_local_extras( ) -def test_get_or_create_caching() -> None: - """test caching of get_or_create requirement""" - teststr = "affinegap==1.10" - from_helper = get_or_create_requirement(teststr) - freshly_made = Requirement(teststr) - # Requirement doesn't have an equality operator (yet) so test - # equality of attribute for list of attributes - for iattr in ["name", "url", "extras", "specifier", "marker"]: - assert getattr(from_helper, iattr) == getattr(freshly_made, iattr) - assert not (get_or_create_requirement(teststr) is Requirement(teststr)) - assert get_or_create_requirement(teststr) is get_or_create_requirement(teststr) - - def test_exclusive_environment_markers() -> None: """Make sure RequirementSet accepts several excluding env markers""" eq36 = install_req_from_line("Django>=1.6.10,<1.7 ; python_version == '3.6'") From 98d8401035d46f95bfc08ae7a6c212210170f13a Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 8 Oct 2021 07:27:38 -0400 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Pradyun Gedam --- news/10550.feature.rst | 2 +- tests/unit/test_packaging.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/news/10550.feature.rst b/news/10550.feature.rst index bd34476442b..942fe58bcb1 100644 --- a/news/10550.feature.rst +++ b/news/10550.feature.rst @@ -1 +1 @@ -Improve performance of dependency resolution. +Cache requirement objects, to improve performance reducing reparses of requirement strings. diff --git a/tests/unit/test_packaging.py b/tests/unit/test_packaging.py index 8a4437ce20e..88277448c2c 100644 --- a/tests/unit/test_packaging.py +++ b/tests/unit/test_packaging.py @@ -35,9 +35,10 @@ def test_get_or_create_caching() -> None: teststr = "affinegap==1.10" from_helper = get_requirement(teststr) freshly_made = Requirement(teststr) + # Requirement doesn't have an equality operator (yet) so test # equality of attribute for list of attributes for iattr in ["name", "url", "extras", "specifier", "marker"]: assert getattr(from_helper, iattr) == getattr(freshly_made, iattr) - assert not (get_requirement(teststr) is Requirement(teststr)) + assert get_requirement(teststr) is not Requirement(teststr) assert get_requirement(teststr) is get_requirement(teststr) From 97327292c6e73ece657e52c0f765f4b743528258 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 8 Oct 2021 07:28:45 -0400 Subject: [PATCH 6/6] Set limited maxsize for requirement parsing cache --- src/pip/_internal/utils/packaging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pip/_internal/utils/packaging.py b/src/pip/_internal/utils/packaging.py index 9a8f2fe24e7..f100473e647 100644 --- a/src/pip/_internal/utils/packaging.py +++ b/src/pip/_internal/utils/packaging.py @@ -73,7 +73,7 @@ def get_installer(dist: Distribution) -> str: return "" -@functools.lru_cache(maxsize=None) +@functools.lru_cache(maxsize=512) def get_requirement(req_string: str) -> Requirement: """Construct a packaging.Requirement object with caching""" # Parsing requirement strings is expensive, and is also expected to happen