Coverage for src/gitlabracadabra/containers/with_digest.py: 86%

138 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-10 17:02 +0100

1# 

2# Copyright (C) 2019-2025 Mathieu Parent <math.parent@gmail.com> 

3# 

4# This program is free software: you can redistribute it and/or modify 

5# it under the terms of the GNU Lesser General Public License as published by 

6# the Free Software Foundation, either version 3 of the License, or 

7# (at your option) any later version. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program. If not, see <http://www.gnu.org/licenses/>. 

16 

17from __future__ import annotations 

18 

19from hashlib import sha256 

20from logging import getLogger 

21from os.path import getsize, isfile 

22from shutil import copy, copyfileobj 

23from tempfile import NamedTemporaryFile 

24from typing import IO, TYPE_CHECKING, BinaryIO 

25from urllib.parse import quote 

26 

27from requests import HTTPError, Response, codes 

28 

29from gitlabracadabra.containers.const import DIGEST_HEADER, DOCKER_MANIFEST_SCHEMA1_SIGNED 

30from gitlabracadabra.containers.scope import PULL, Scope 

31from gitlabracadabra.disk_cache import cache_dir 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34 because the condition on line 33 was never true

34 from typing import Self 

35 

36 from gitlabracadabra.containers.registry_importer import RegistryImporter 

37 

38 

39logger = getLogger(__name__) 

40 

41 

42class WithDigest: 

43 """An object with a digest.""" 

44 

45 supported_mime_types: tuple[str, ...] | None = None 

46 

47 def __init__( 

48 self, 

49 registry: RegistryImporter, 

50 manifest_name: str, 

51 digest: str | None = None, 

52 *, 

53 size: int | None = None, 

54 mime_type: str | None = None, 

55 ) -> None: 

56 """Initialize an object with a digest. 

57 

58 Args: 

59 registry: Registry. 

60 manifest_name: Manifest name (Example: library/debian). 

61 digest: Digest (Example: sha256:5890f8ba95f680c87fcf89e51190098641b4f646102ce7ca906e7f83c84874dc). 

62 size: Size (Example: 42). 

63 mime_type: Content-Type / mediaType. 

64 """ 

65 self._registry = registry 

66 self._manifest_name = manifest_name 

67 self._digest = digest 

68 self._size = size 

69 self._mime_type = mime_type 

70 self._exists: bool | None = None 

71 self._fd: BinaryIO | None = None 

72 self._retrieve_mehod = "head" 

73 self.forced_digest = False 

74 

75 def __eq__(self, other: object) -> bool: 

76 """Compare. 

77 

78 Args: 

79 other: Compare 

80 

81 Returns: 

82 True if registry, manifest name, digest, size and mime_types are equal. 

83 """ 

84 return (isinstance(self, type(other)) or isinstance(other, type(self))) and self.__dict__ == other.__dict__ 

85 

86 @property 

87 def registry(self) -> RegistryImporter: 

88 """Get the registry. 

89 

90 Returns: 

91 The registry. 

92 """ 

93 return self._registry 

94 

95 @property 

96 def manifest_name(self) -> str: 

97 """Get the manifest name. 

98 

99 Returns: 

100 The manifest name. 

101 """ 

102 return self._manifest_name 

103 

104 @property 

105 def digest(self) -> str: 

106 """Get the digest. 

107 

108 Returns: 

109 The digest. 

110 

111 Raises: 

112 ValueError: Unable to get digest. 

113 """ 

114 if self._digest is None: 

115 self._retrieve() 

116 if self._digest is None: 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true

117 msg = "Unable to get digest" 

118 raise ValueError(msg) 

119 return self._digest 

120 

121 @property 

122 def size(self) -> int: 

123 """Get the size. 

124 

125 Returns: 

126 The size. 

127 

128 Raises: 

129 ValueError: Unable to get size. 

130 """ 

131 if self._size is None: 

132 try: 

133 self._size = getsize(self.cache_path) 

134 except FileNotFoundError: 

135 self._retrieve() 

136 if self._size is None: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true

137 msg = "Unable to get size" 

138 raise ValueError(msg) 

139 return self._size 

140 

141 @property 

142 def mime_type(self) -> str | None: 

143 """Get the MIME type (mediaType). 

144 

145 Returns: 

146 The MIME type. 

147 """ 

148 if self._mime_type is None: 

149 self._retrieve() 

150 return self._mime_type 

151 

152 @property 

153 def cache_path(self) -> str: 

154 """Get the cache path (local). 

155 

156 Returns: 

157 Local path. 

158 """ 

159 return str(cache_dir("containers_cache") / quote(self.digest, safe="")) 

160 

161 @property 

162 def registry_path(self) -> str: 

163 """Get the registry path. 

164 

165 Raises: 

166 NotImplementedError: Needs to be implemented in subclasses. 

167 """ 

168 raise NotImplementedError 

169 

170 def __enter__(self) -> Self: 

171 """Open the cached file. 

172 

173 Returns: 

174 self. 

175 

176 Raises: 

177 RuntimeError: File already opened. 

178 """ 

179 self._ensure_cached() 

180 if self._fd is not None: 180 ↛ 181line 180 didn't jump to line 181 because the condition on line 180 was never true

181 msg = "File already opened" 

182 raise RuntimeError(msg) 

183 self._fd = open(self.cache_path, "rb") # noqa: SIM115 

184 return self 

185 

186 def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore 

187 """Close the cached file. 

188 

189 Args: 

190 exc_type: Exception type. 

191 exc_val: Exception value. 

192 exc_tb: Exception traceback. 

193 """ 

194 if self._fd is not None: 194 ↛ exitline 194 didn't return from function '__exit__' because the condition on line 194 was always true

195 self._fd.close() 

196 self._fd = None 

197 

198 def read(self, n: int = -1) -> bytes: 

199 """Read the cached file. 

200 

201 Args: 

202 n: buffer size. 

203 

204 Returns: 

205 Bytes. 

206 

207 Raises: 

208 ValueError: File is not opened. 

209 """ 

210 if self._fd is None: 210 ↛ 211line 210 didn't jump to line 211 because the condition on line 210 was never true

211 msg = "File is not opened" 

212 raise ValueError(msg) 

213 return self._fd.read(n) 

214 

215 def scope(self, actions: str = PULL) -> Scope: 

216 """Get a scope. 

217 

218 Args: 

219 actions: Scope action. 

220 

221 Returns: 

222 A scope. 

223 """ 

224 return Scope(self.manifest_name, actions) 

225 

226 def exists(self) -> bool: 

227 """Get Blob/Manifest existence in the associated registry. 

228 

229 Returns: 

230 True or False. 

231 

232 Raises: 

233 HTTPError: Error when fetching existence. 

234 """ 

235 if self._exists is None: 

236 try: 

237 self._retrieve() 

238 self._exists = True 

239 except HTTPError as err: 

240 if (err.response is None) or (err.response.status_code != codes["not_found"]): 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true

241 raise 

242 self._exists = False 

243 if self._exists: 

244 self.register() 

245 return self._exists 

246 

247 def register(self) -> None: 

248 """Notify the registry that the Digest exists.""" 

249 # Overridden in Blob 

250 

251 def _ensure_cached(self) -> None: 

252 if self._digest is None or not isfile(self.cache_path): 

253 self._retrieve(with_content=True) 

254 

255 def _retrieve(self, *, with_content: bool = False) -> None: 

256 method = self._retrieve_mehod 

257 if with_content: 

258 method = "get" 

259 with self._request(method) as response: 

260 if self._digest is None: 

261 self._digest = response.headers.get(DIGEST_HEADER) 

262 elif DIGEST_HEADER in response.headers and self._digest != response.headers.get(DIGEST_HEADER): 262 ↛ 263line 262 didn't jump to line 263 because the condition on line 262 was never true

263 msg = f"Retrieved digest does not match {response.headers.get(DIGEST_HEADER)} != {self._digest}" 

264 raise ValueError(msg) 

265 if "Content-Type" in response.headers: 265 ↛ 267line 265 didn't jump to line 267 because the condition on line 265 was always true

266 self._mime_type = response.headers.get("Content-Type") 

267 self._size = int(response.headers["Content-Length"]) 

268 if method != "head": 

269 self._download_and_verify(response) 

270 

271 def _request(self, method: str) -> Response: 

272 return self.registry.request( 

273 method, 

274 self.registry_path, 

275 scopes={self.scope()}, 

276 accept=self.supported_mime_types, 

277 stream=True, 

278 ) 

279 

280 def _download_and_verify(self, response: Response) -> None: 

281 with NamedTemporaryFile(dir=cache_dir("containers_cache")) as fp: 

282 copyfileobj(response.raw, fp) 

283 downloaded_digest = self._compute_digest(fp) 

284 if self._digest is None: 

285 self._digest = downloaded_digest 

286 else: 

287 self._verify_digest(downloaded_digest) 

288 copy(fp.name, self.cache_path) 

289 

290 def _verify_digest(self, digest: str) -> None: 

291 if digest != self._digest: 

292 if self._mime_type == DOCKER_MANIFEST_SCHEMA1_SIGNED: 292 ↛ 302line 292 didn't jump to line 302 because the condition on line 292 was always true

293 # https://docs.docker.com/registry/spec/api/#content-digests 

294 # "manifest body without the signature content, also known as the JWS payload" 

295 logger.info( 

296 "Ignoring checksum mismatch for signed manifest %s: %s ! %s", 

297 str(self), 

298 digest, 

299 self._digest, 

300 ) 

301 else: 

302 msg = f"Checksum mismatch: {digest} != {self._digest}" 

303 raise ValueError(msg) 

304 

305 def _compute_digest(self, fp: IO[bytes]) -> str: 

306 sha256_hash = sha256() 

307 buf_len = 4096 

308 fp.seek(0) 

309 for byte_block in iter(lambda: fp.read(buf_len), b""): 

310 sha256_hash.update(byte_block) 

311 return f"sha256:{sha256_hash.hexdigest()}"