Coverage for src/gitlabracadabra/packages/pypi.py: 80%
119 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-10 17:02 +0100
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-10 17:02 +0100
1#
2# Copyright (C) 2019-2025 Mathieu Parent <math.parent@gmail.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Lesser General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
17from __future__ import annotations
19from html import unescape
20from logging import getLogger
21from posixpath import join as posixpath_join
22from typing import TYPE_CHECKING, Any
23from urllib.parse import quote as urlquote
24from urllib.parse import urljoin, urlparse, urlunparse
25from urllib.request import parse_keqv_list
27from html5lib import parse as html5lib_parse
28from packaging.requirements import InvalidRequirement, Requirement
29from packaging.utils import canonicalize_name
30from packaging.version import InvalidVersion, Version
31from requests import codes
33from gitlabracadabra.packages.package_file import PackageFile
34from gitlabracadabra.packages.pip import extract_version_from_fragment
35from gitlabracadabra.packages.source import Source
37if TYPE_CHECKING: 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true
38 from requests.models import Response
40try:
41 from packaging.utils import parse_wheel_filename
43 HAS_PACKAGING_PARSERS = True
44except ImportError: # packaging << 20.9
45 HAS_PACKAGING_PARSERS = False
47logger = getLogger(__name__)
50class PyPI(Source):
51 """PyPI repository."""
53 def __init__(
54 self,
55 *,
56 log_prefix: str = "",
57 index_url: str | None = None,
58 requirements: str | list[str],
59 ) -> None:
60 """Initialize a PyPI repository object.
62 Args:
63 log_prefix: Log prefix.
64 index_url: index-url (default to https://pypi.org/simple).
65 requirements: Python requirements as list or string.
66 """
67 super().__init__()
68 self._log_prefix = log_prefix
69 self._index_url = index_url or "https://pypi.org/simple"
70 if isinstance(requirements, str):
71 self._requirements = requirements.splitlines()
72 else:
73 self._requirements = [req for reqs in requirements for req in reqs.splitlines()]
75 def __str__(self) -> str:
76 """Return string representation.
78 Returns:
79 A string.
80 """
81 return "PyPI repository"
83 @property
84 def package_files(self) -> list[PackageFile]:
85 """Return list of package files.
87 Returns:
88 List of package files.
89 """
90 package_files: list[PackageFile] = []
91 if not HAS_PACKAGING_PARSERS: 91 ↛ 92line 91 didn't jump to line 92 because the condition on line 91 was never true
92 logger.error(
93 "%sPyPI packages mirroring requires packaging >= 20.9",
94 self._log_prefix,
95 )
96 return package_files
97 for requirement_string in self._requirements:
98 if requirement_string.lstrip().startswith("#"):
99 continue
100 package_files_from_requirement_string = self._package_files_from_requirement_string(requirement_string)
101 if not package_files_from_requirement_string: 101 ↛ 102line 101 didn't jump to line 102 because the condition on line 101 was never true
102 logger.warning(
103 "%sNo package files matching found for requirement: %s",
104 self._log_prefix,
105 requirement_string,
106 )
107 package_files.extend(package_files_from_requirement_string)
108 return package_files
110 def _package_files_from_requirement_string(self, requirement_string: str) -> list[PackageFile]:
111 try:
112 req = Requirement(requirement_string)
113 except InvalidRequirement:
114 logger.warning(
115 '%sInvalid requirement "%s"',
116 self._log_prefix,
117 requirement_string,
118 )
119 return []
120 return self._package_files_from_requirement(req)
122 def _package_files_from_requirement(self, req: Requirement) -> list[PackageFile]:
123 index_url = self._get_index_url(req.name)
124 index_response = self.session.request("get", index_url)
125 if index_response.status_code != codes["ok"]: 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true
126 logger.warning(
127 "%sUnexpected HTTP status for PyPI index %s: received %i %s",
128 self._log_prefix,
129 index_url,
130 index_response.status_code,
131 index_response.reason,
132 )
133 return []
134 return self._package_files_from_requirement_and_response(req, index_response)
136 def _get_index_url(self, project_name: str) -> str:
137 loc = posixpath_join(
138 self._index_url,
139 urlquote(canonicalize_name(project_name)),
140 )
141 if not loc.endswith("/"): 141 ↛ 143line 141 didn't jump to line 143 because the condition on line 141 was always true
142 loc = f"{loc}/"
143 return loc
145 def _package_files_from_requirement_and_response(
146 self,
147 req: Requirement,
148 response: Response,
149 ) -> list[PackageFile]:
150 document = html5lib_parse(
151 response.content,
152 transport_encoding=response.encoding,
153 namespaceHTMLElements=False,
154 )
156 base_url = self._get_base_url(response, document)
158 package_files: dict[Version, list[PackageFile]] = {}
159 for anchor in document.findall(".//a"):
160 version, package_file = self._package_file_from_requirement_and_anchor(req, anchor, base_url)
161 if version and package_file:
162 if version not in package_files:
163 package_files[version] = []
164 package_files[version].append(package_file)
166 try:
167 best_match = sorted(package_files, reverse=True)[0]
168 except IndexError:
169 return []
170 return package_files[best_match]
172 def _get_base_url(self, response: Response, document: Any) -> str:
173 base_url = response.url
174 for base in document.findall(".//base"): 174 ↛ 175line 174 didn't jump to line 175 because the loop on line 174 never started
175 href = base.get("href")
176 if href is not None:
177 base_url = href
178 break
179 return base_url
181 def _package_file_from_requirement_and_anchor(
182 self,
183 req: Requirement,
184 anchor: Any,
185 base_url: str,
186 ) -> tuple[Version | None, PackageFile | None]:
187 if "href" not in anchor.keys(): # noqa: SIM118 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true
188 return None, None
189 if anchor.get("data-yanked") and not str(req.specifier).startswith("=="):
190 return None, None
192 parsed_url = urlparse(urljoin(base_url, anchor.get("href")))
194 filename = parsed_url.path.split("/")[-1]
195 try:
196 name, ver = self._parse_filename(filename, canonicalize_name(req.name))
197 except InvalidVersion:
198 # Ignore invalid versions, like in pbr-0.5.2.5.g5b3e942.tar.gz
199 logger.debug(
200 "%sIgnoring invalid version for filename %s",
201 self._log_prefix,
202 filename,
203 )
204 return None, None
206 if name is None or ver is None or ver not in req.specifier:
207 return None, None
209 metadata = parse_keqv_list(parsed_url.fragment.split("&"))
211 if "data-requires-python" in anchor.keys(): # noqa: SIM118
212 metadata["requires-python"] = unescape(anchor.get("data-requires-python"))
214 return ver, PackageFile(
215 urlunparse(parsed_url._replace(fragment="")),
216 "pypi",
217 name,
218 str(ver),
219 filename,
220 metadata=metadata,
221 )
223 def _parse_filename(self, filename: str, canonical_name: str) -> tuple[str | None, Version | None]:
224 if filename.endswith(".whl"):
225 name, ver, _, _ = parse_wheel_filename(filename)
226 return name, ver
227 if filename.endswith(".egg"):
228 # Ignore egg files for now
229 return None, None
230 if filename.endswith(".tar.gz"): 230 ↛ 234line 230 didn't jump to line 234 because the condition on line 230 was always true
231 ver_str = extract_version_from_fragment(filename[:-7], canonical_name)
232 if ver_str: 232 ↛ 234line 232 didn't jump to line 234 because the condition on line 232 was always true
233 return canonical_name, Version(ver_str)
234 return None, None