pypa · cjerdonek · Apr 8, 2019 · Feb 7, 2019
diff --git a/news/6285.bugfix b/news/6285.bugfix
@@ -0,0 +1 @@
+Fix incorrect URL quoting of IPv6 addresses.
diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py
@@ -939,15 +939,28 @@ def _get_encoding_from_headers(headers):
  return None
 
 
-_CLEAN_LINK_RE = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
-
-
 def _clean_link(url):
  # type: (str) -> str
  """Makes sure a link is fully encoded. That is, if a ' ' shows up in
  the link, it will be rewritten to %20 (while not over-quoting
  % or other characters)."""
- return _CLEAN_LINK_RE.sub(lambda match: '%%%2x' % ord(match.group(0)), url)
+ # Split the URL into parts according to the general structure
+ # `scheme://netloc/path;parameters?query#fragment`. Note that the
+ # `netloc` can be empty and the URI will then refer to a local
+ # filesystem path.
+ result = urllib_parse.urlparse(url)
+ # In both cases below we unquote prior to quoting to make sure
+ # nothing is double quoted.
+ if result.netloc == "":
+ # On Windows the path part might contain a drive letter which
+ # should not be quoted. On Linux where drive letters do not
+ # exist, the colon should be quoted. We rely on urllib.request
+ # to do the right thing here.
+ path = urllib_request.pathname2url(
+ urllib_request.url2pathname(result.path))
+ else:
+ path = urllib_parse.quote(urllib_parse.unquote(result.path))
+ return urllib_parse.urlunparse(result._replace(path=path))
 
 
 class HTMLPage(object):

diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py
@@ -7,7 +7,7 @@
 
 from pip._internal.download import PipSession
 from pip._internal.index import (
- Link, PackageFinder, _determine_base_url, _egg_info_matches,
+ Link, PackageFinder, _clean_link, _determine_base_url, _egg_info_matches,
  _find_name_version_sep, _get_html_page,
 )
 
@@ -280,3 +280,65 @@ def test_request_retries(caplog):
  'Could not fetch URL http://localhost: Retry error - skipping'
  in caplog.text
  )
+
+
+@pytest.mark.parametrize(
+ ("url", "clean_url"),
+ [
+ # URL with hostname and port. Port separator should not be quoted.
+ ("https://localhost.localdomain:8181/path/with space/",
+ "https://localhost.localdomain:8181/path/with%20space/"),
+ # URL that is already properly quoted. The quoting `%`
+ # characters should not be quoted again.
+ ("https://localhost.localdomain:8181/path/with%20quoted%20space/",
+ "https://localhost.localdomain:8181/path/with%20quoted%20space/"),
+ # URL with IPv4 address and port.
+ ("https://127.0.0.1:8181/path/with space/",
+ "https://127.0.0.1:8181/path/with%20space/"),
+ # URL with IPv6 address and port. The `[]` brackets around the
+ # IPv6 address should not be quoted.
+ ("https://[fd00:0:0:236::100]:8181/path/with space/",
+ "https://[fd00:0:0:236::100]:8181/path/with%20space/"),
+ # URL with query. The leading `?` should not be quoted.
+ ("https://localhost.localdomain:8181/path/with/query?request=test",
+ "https://localhost.localdomain:8181/path/with/query?request=test"),
+ # URL with colon in the path portion.
+ ("https://localhost.localdomain:8181/path:/with:/colon",
+ "https://localhost.localdomain:8181/path%3A/with%3A/colon"),
+ # URL with something that looks like a drive letter, but is
+ # not. The `:` should be quoted.
+ ("https://localhost.localdomain/T:/path/",
+ "https://localhost.localdomain/T%3A/path/")
+ ]
+)
+def test_clean_link(url, clean_url):
+ assert(_clean_link(url) == clean_url)
+
+
+@pytest.mark.parametrize(
+ ("url", "clean_url"),
+ [
+ # URL with Windows drive letter. The `:` after the drive
+ # letter should not be quoted. The trailing `/` should be
+ # removed.
+ ("file:///T:/path/with spaces/",
+ "file:///T:/path/with%20spaces")
+ ]
+)
+@pytest.mark.skipif("sys.platform != 'win32'")
+def test_clean_link_windows(url, clean_url):
+ assert(_clean_link(url) == clean_url)
+
+
+@pytest.mark.parametrize(
+ ("url", "clean_url"),
+ [
+ # URL with Windows drive letter, running on non-windows
+ # platform. The `:` after the drive should be quoted.
+ ("file:///T:/path/with spaces/",
+ "file:///T%3A/path/with%20spaces/")
+ ]
+)
+@pytest.mark.skipif("sys.platform == 'win32'")
+def test_clean_link_non_windows(url, clean_url):
+ assert(_clean_link(url) == clean_url)