From 795d32e6a62f7e07b1e4ca48f2e89183129e0445 Mon Sep 17 00:00:00 2001 From: Fridolin Pokorny Date: Wed, 25 Jan 2023 15:50:24 +0100 Subject: [PATCH 1/5] Always provide direct_url.json for wheel installations Signed-off-by: Fridolin Pokorny --- src/pip/_internal/req/req_install.py | 11 +++++++++-- src/pip/_internal/utils/direct_url_helpers.py | 6 +++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index bb38ec09da4..45d3f75b38f 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -23,6 +23,7 @@ from pip._internal.build_env import BuildEnvironment, NoOpBuildEnvironment from pip._internal.exceptions import InstallationError, LegacyInstallFailure from pip._internal.locations import get_scheme +from pip._internal.utils.misc import hash_file from pip._internal.metadata import ( BaseDistribution, get_default_environment, @@ -30,7 +31,7 @@ get_wheel_distribution, ) from pip._internal.metadata.base import FilesystemWheel -from pip._internal.models.direct_url import DirectUrl +from pip._internal.models.direct_url import (ArchiveInfo, DirectUrl) from pip._internal.models.link import Link from pip._internal.operations.build.metadata import generate_metadata from pip._internal.operations.build.metadata_editable import generate_editable_metadata @@ -783,7 +784,6 @@ def install( if self.is_wheel: assert self.local_file_path - direct_url = None # TODO this can be refactored to direct_url = self.download_info if self.editable: direct_url = direct_url_for_editable(self.unpacked_source_directory) @@ -793,6 +793,13 @@ def install( self.source_dir, self.original_link_is_in_wheel_cache, ) + else: + sha256 = hash_file(self.local_file_path)[0] + direct_url = DirectUrl( + url=self.download_info.redacted_url, + info=ArchiveInfo("sha256=" + sha256.hexdigest()), + ) + install_wheel( self.name, self.local_file_path, diff --git a/src/pip/_internal/utils/direct_url_helpers.py b/src/pip/_internal/utils/direct_url_helpers.py index 0e8e5e1608b..8ce0a4e40f3 100644 --- a/src/pip/_internal/utils/direct_url_helpers.py +++ b/src/pip/_internal/utils/direct_url_helpers.py @@ -2,6 +2,7 @@ from pip._internal.models.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo from pip._internal.models.link import Link +from pip._internal.utils.misc import hash_file from pip._internal.utils.urls import path_to_url from pip._internal.vcs import vcs @@ -76,10 +77,13 @@ def direct_url_from_link( subdirectory=link.subdirectory_fragment, ) else: - hash = None hash_name = link.hash_name if hash_name: hash = f"{hash_name}={link.hash}" + else: + sha256 = hash_file(link.path)[0] + hash = f"sha256={sha256.hexdigest()}" + return DirectUrl( url=link.url_without_fragment, info=ArchiveInfo(hash=hash), From 0013e5a33b9b838e27360df62005252f0d144b02 Mon Sep 17 00:00:00 2001 From: Fridolin Pokorny Date: Thu, 9 Mar 2023 13:50:26 +0100 Subject: [PATCH 2/5] Use provenance_url.json instead of direct_url.json for provenance --- src/pip/_internal/models/direct_url.py | 3 +++ src/pip/_internal/operations/install/wheel.py | 8 ++++++-- src/pip/_internal/req/req_install.py | 1 + src/pip/_internal/utils/direct_url_helpers.py | 3 +++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/models/direct_url.py b/src/pip/_internal/models/direct_url.py index e75feda9ca9..253fec2668c 100644 --- a/src/pip/_internal/models/direct_url.py +++ b/src/pip/_internal/models/direct_url.py @@ -15,6 +15,7 @@ T = TypeVar("T") DIRECT_URL_METADATA_NAME = "direct_url.json" +PROVENANCE_URL_METADATA_NAME = "provenance_url.json" ENV_VAR_RE = re.compile(r"^\$\{[A-Za-z0-9-_]+\}(:\$\{[A-Za-z0-9-_]+\})?$") @@ -144,10 +145,12 @@ def __init__( url: str, info: InfoType, subdirectory: Optional[str] = None, + provenance_file: bool = False ) -> None: self.url = url self.info = info self.subdirectory = subdirectory + self.provenance_file = provenance_file def _remove_auth_from_netloc(self, netloc: str) -> str: if "@" not in netloc: diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index c79941398a2..2643390aec0 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -47,7 +47,7 @@ FilesystemWheel, get_wheel_distribution, ) -from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, DirectUrl +from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, PROVENANCE_URL_METADATA_NAME, DirectUrl from pip._internal.models.scheme import SCHEME_KEYS, Scheme from pip._internal.utils.filesystem import adjacent_tmp_file, replace from pip._internal.utils.misc import captured_stdout, ensure_dir, hash_file, partition @@ -672,7 +672,11 @@ def _generate_file(path: str, **kwargs: Any) -> Generator[BinaryIO, None, None]: # Record the PEP 610 direct URL reference if direct_url is not None: - direct_url_path = os.path.join(dest_info_dir, DIRECT_URL_METADATA_NAME) + if direct_url.provenance_file: + direct_url_path = os.path.join(dest_info_dir, PROVENANCE_URL_METADATA_NAME) + else: + direct_url_path = os.path.join(dest_info_dir, DIRECT_URL_METADATA_NAME) + print(direct_url_path) with _generate_file(direct_url_path) as direct_url_file: direct_url_file.write(direct_url.to_json().encode("utf-8")) generated.append(direct_url_path) diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 45d3f75b38f..a7f43a3362a 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -798,6 +798,7 @@ def install( direct_url = DirectUrl( url=self.download_info.redacted_url, info=ArchiveInfo("sha256=" + sha256.hexdigest()), + provenance_file=True, ) install_wheel( diff --git a/src/pip/_internal/utils/direct_url_helpers.py b/src/pip/_internal/utils/direct_url_helpers.py index 8ce0a4e40f3..dae129fb99c 100644 --- a/src/pip/_internal/utils/direct_url_helpers.py +++ b/src/pip/_internal/utils/direct_url_helpers.py @@ -78,14 +78,17 @@ def direct_url_from_link( ) else: hash_name = link.hash_name + provenance_file = False if hash_name: hash = f"{hash_name}={link.hash}" else: sha256 = hash_file(link.path)[0] hash = f"sha256={sha256.hexdigest()}" + provenance_file = True return DirectUrl( url=link.url_without_fragment, info=ArchiveInfo(hash=hash), subdirectory=link.subdirectory_fragment, + provenance_file=provenance_file, ) From 47d25875ecca99c0c90df16ae9ab5868eb93e2d0 Mon Sep 17 00:00:00 2001 From: Fridolin Pokorny Date: Fri, 10 Mar 2023 16:18:47 +0100 Subject: [PATCH 3/5] Provide testsuite for pip provenance url Signed-off-by: Fridolin Pokorny --- src/pip/_internal/utils/direct_url_helpers.py | 8 +--- tests/functional/test_install_direct_url.py | 46 ++++++++++++++++++- tests/lib/direct_url.py | 11 +++-- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/src/pip/_internal/utils/direct_url_helpers.py b/src/pip/_internal/utils/direct_url_helpers.py index dae129fb99c..e810eee567b 100644 --- a/src/pip/_internal/utils/direct_url_helpers.py +++ b/src/pip/_internal/utils/direct_url_helpers.py @@ -77,18 +77,12 @@ def direct_url_from_link( subdirectory=link.subdirectory_fragment, ) else: + hash = None hash_name = link.hash_name - provenance_file = False if hash_name: hash = f"{hash_name}={link.hash}" - else: - sha256 = hash_file(link.path)[0] - hash = f"sha256={sha256.hexdigest()}" - provenance_file = True - return DirectUrl( url=link.url_without_fragment, info=ArchiveInfo(hash=hash), subdirectory=link.subdirectory_fragment, - provenance_file=provenance_file, ) diff --git a/tests/functional/test_install_direct_url.py b/tests/functional/test_install_direct_url.py index cd2a4aea75f..f669324c7db 100644 --- a/tests/functional/test_install_direct_url.py +++ b/tests/functional/test_install_direct_url.py @@ -1,6 +1,7 @@ +import shutil import pytest -from pip._internal.models.direct_url import VcsInfo +from pip._internal.models.direct_url import VcsInfo, ArchiveInfo from tests.lib import PipTestEnvironment, TestData, _create_test_package from tests.lib.direct_url import get_created_direct_url @@ -10,6 +11,12 @@ def test_install_find_links_no_direct_url(script: PipTestEnvironment) -> None: result = script.pip_install_local("simple") assert not get_created_direct_url(result, "simple") + provenance_url = get_created_direct_url(result, "simple", provenance_file=True) + assert provenance_url is not None + assert isinstance(provenance_url.info, ArchiveInfo) + assert provenance_url.url.startswith("file:///") + assert provenance_url.info.hash.startswith("sha256=") + @pytest.mark.usefixtures("with_wheel") def test_install_vcs_editable_no_direct_url(script: PipTestEnvironment) -> None: @@ -19,6 +26,7 @@ def test_install_vcs_editable_no_direct_url(script: PipTestEnvironment) -> None: # legacy editable installs do not generate .dist-info, # hence no direct_url.json assert not get_created_direct_url(result, "testpkg") + assert not get_created_direct_url(result, "testpkg", provenance_file=True) @pytest.mark.usefixtures("with_wheel") @@ -27,6 +35,7 @@ def test_install_vcs_non_editable_direct_url(script: PipTestEnvironment) -> None url = pkg_path.as_uri() args = ["install", f"git+{url}#egg=testpkg"] result = script.pip(*args) + assert not get_created_direct_url(result, "testpkg", provenance_file=True) direct_url = get_created_direct_url(result, "testpkg") assert direct_url assert direct_url.url == url @@ -40,6 +49,7 @@ def test_install_archive_direct_url(script: PipTestEnvironment, data: TestData) assert req.startswith("simple @ file://") result = script.pip("install", req) assert get_created_direct_url(result, "simple") + assert not get_created_direct_url(result, "simple", provenance_file=True) @pytest.mark.network @@ -53,6 +63,7 @@ def test_install_vcs_constraint_direct_url(script: PipTestEnvironment) -> None: ) result = script.pip("install", "pip-test-package", "-c", constraints_file) assert get_created_direct_url(result, "pip_test_package") + assert not get_created_direct_url(result, "pip_test_package", provenance_file=True) @pytest.mark.usefixtures("with_wheel") @@ -63,3 +74,36 @@ def test_install_vcs_constraint_direct_file_url(script: PipTestEnvironment) -> N constraints_file.write_text(f"git+{url}#egg=testpkg") result = script.pip("install", "testpkg", "-c", constraints_file) assert get_created_direct_url(result, "testpkg") + assert not get_created_direct_url(result, "testpkg", provenance_file=True) + + +@pytest.mark.network +@pytest.mark.usefixtures("with_wheel") +def test_install_provenance_url(script: PipTestEnvironment) -> None: + result = script.pip("install", "INITools==0.2") + assert not get_created_direct_url(result, "INITools") + provenance_url = get_created_direct_url(result, "INITools", provenance_file=True) + assert provenance_url is not None + assert isinstance(provenance_url.info, ArchiveInfo) + assert provenance_url.url.startswith("https://files.pythonhosted.org/packages/") + assert provenance_url.info.hash.startswith("sha256=") + + +@pytest.mark.usefixtures("with_wheel") +def test_install_find_links_provenance_url(script: PipTestEnvironment, data: TestData) -> None: + shutil.copy(data.packages / "simple-1.0.tar.gz", script.scratch_path) + html = script.scratch_path.joinpath("index.html") + html.write_text('') + result = script.pip( + "install", + "simple==1.0", + "--no-index", + "--find-links", + script.scratch_path, + ) + assert not get_created_direct_url(result, "simple") + provenance_url = get_created_direct_url(result, "simple", provenance_file=True) + assert provenance_url is not None + assert isinstance(provenance_url.info, ArchiveInfo) + assert provenance_url.url.startswith("file:///") + assert provenance_url.info.hash.startswith("sha256=") diff --git a/tests/lib/direct_url.py b/tests/lib/direct_url.py index e0dac032062..891c86367ba 100644 --- a/tests/lib/direct_url.py +++ b/tests/lib/direct_url.py @@ -3,13 +3,14 @@ from pathlib import Path from typing import Optional -from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, DirectUrl +from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, PROVENANCE_URL_METADATA_NAME, DirectUrl from tests.lib import TestPipResult -def get_created_direct_url_path(result: TestPipResult, pkg: str) -> Optional[Path]: +def get_created_direct_url_path(result: TestPipResult, pkg: str, provenance_file: bool = False) -> Optional[Path]: + url_file_name = PROVENANCE_URL_METADATA_NAME if provenance_file else DIRECT_URL_METADATA_NAME direct_url_metadata_re = re.compile( - pkg + r"-[\d\.]+\.dist-info." + DIRECT_URL_METADATA_NAME + r"$" + pkg + r"-[\d\.]+\.dist-info." + url_file_name + r"$" ) for filename in result.files_created: if direct_url_metadata_re.search(os.fspath(filename)): @@ -17,8 +18,8 @@ def get_created_direct_url_path(result: TestPipResult, pkg: str) -> Optional[Pat return None -def get_created_direct_url(result: TestPipResult, pkg: str) -> Optional[DirectUrl]: - direct_url_path = get_created_direct_url_path(result, pkg) +def get_created_direct_url(result: TestPipResult, pkg: str, *, provenance_file: bool = False) -> Optional[DirectUrl]: + direct_url_path = get_created_direct_url_path(result, pkg, provenance_file=provenance_file) if direct_url_path: with open(direct_url_path) as f: return DirectUrl.from_json(f.read()) From 3fcb5b8e19b6889f4e18005da58ee59fee20b41e Mon Sep 17 00:00:00 2001 From: Fridolin Pokorny Date: Fri, 10 Mar 2023 16:27:45 +0100 Subject: [PATCH 4/5] Add a temporary news entry Signed-off-by: Fridolin Pokorny --- news/9999.feature.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 news/9999.feature.rst diff --git a/news/9999.feature.rst b/news/9999.feature.rst new file mode 100644 index 00000000000..f1a7457ac33 --- /dev/null +++ b/news/9999.feature.rst @@ -0,0 +1,2 @@ +Implement storing provenance_url.json file as specified in PEP 9999. +This allows checking hashes of Python distributions installed from an index. From c29b3ecd791cdf0eeb58cb57296738ffa5b386d2 Mon Sep 17 00:00:00 2001 From: Fridolin Pokorny Date: Fri, 10 Mar 2023 16:30:18 +0100 Subject: [PATCH 5/5] Remove unused import Signed-off-by: Fridolin Pokorny --- src/pip/_internal/utils/direct_url_helpers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pip/_internal/utils/direct_url_helpers.py b/src/pip/_internal/utils/direct_url_helpers.py index e810eee567b..0e8e5e1608b 100644 --- a/src/pip/_internal/utils/direct_url_helpers.py +++ b/src/pip/_internal/utils/direct_url_helpers.py @@ -2,7 +2,6 @@ from pip._internal.models.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo from pip._internal.models.link import Link -from pip._internal.utils.misc import hash_file from pip._internal.utils.urls import path_to_url from pip._internal.vcs import vcs