Coverage for src / gitversioned / utils / git.py: 83%

184 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-14 20:55 +0000

1""" 

2Git repository utility module. 

3 

4This module provides a robust interface for interacting with Git repositories. 

5It uses a combination of subprocess calls and Pydantic models to safely and 

6efficiently retrieve and represent Git metadata such as commits, tags, and branches. 

7 

8.. code-block:: python 

9 

10 from gitversioned.utils.git import GitRepository 

11 

12 repo = GitRepository() 

13 if repo.is_available: 

14 print(repo.current_branch.branch_name) 

15""" 

16 

17from __future__ import annotations 

18 

19import shlex 

20import subprocess 

21import sys 

22from collections.abc import Iterator 

23from datetime import datetime 

24from pathlib import Path 

25from typing import Any 

26 

27from pydantic import BaseModel, Field, model_validator 

28 

29from gitversioned.logging import logger 

30 

31__all__ = [ 

32 "GitReference", 

33 "GitRepository", 

34 "NotAGitRepositoryError", 

35] 

36 

37 

38class NotAGitRepositoryError(Exception): 

39 """ 

40 Raised when the directory is not a valid Git repository. 

41 

42 This exception is raised when Git operations are attempted on a directory that 

43 is not part of a valid Git work tree. 

44 """ 

45 

46 

47class GitReference(BaseModel): 

48 """ 

49 Pydantic model representing a Git reference (commit, tag, or branch). 

50 

51 Provides the foundational metadata fields for Git objects and includes 

52 optional fields for specific types like author information for commits, 

53 or names for tags and branches. 

54 

55 .. code-block:: python 

56 

57 def print_metadata(metadata: GitReference): 

58 print(f"SHA: {metadata.short_sha}, HEAD: {metadata.is_head_commit}") 

59 """ 

60 

61 commit_sha: str = Field( 

62 description="The full, un-abbreviated SHA hash of the commit.", 

63 default="", 

64 ) 

65 short_sha: str = Field( 

66 description="The abbreviated SHA hash of the commit for display purposes.", 

67 default="", 

68 ) 

69 timestamp: datetime = Field( 

70 description="The creation or commit timestamp of the Git object.", 

71 default=datetime.min, 

72 ) 

73 distance_from_head: int = Field( 

74 description="The number of commits between this object and the current HEAD.", 

75 default=sys.maxsize, 

76 ) 

77 is_head_commit: bool = Field( 

78 description="Indicates whether this object represents the current HEAD commit.", 

79 default=False, 

80 ) 

81 total_commits: int = Field( 

82 description="Total number of commits in the repository.", 

83 default=0, 

84 ) 

85 author_name: str = Field( 

86 description="The name of the author who created the commit.", default="" 

87 ) 

88 author_email: str = Field( 

89 description="The email address of the author who created the commit.", 

90 default="", 

91 ) 

92 commit_message: str = Field( 

93 description="The full message associated with the commit.", default="" 

94 ) 

95 tag_name: str = Field(description="The name of the Git tag.", default="") 

96 branch_name: str = Field(description="The name of the Git branch.", default="") 

97 is_current_branch: bool = Field( 

98 description="Indicates whether this branch is currently checked out.", 

99 default=False, 

100 ) 

101 

102 def __str__(self) -> str: 

103 if self.tag_name: 

104 return f"{self.tag_name} -> {self.short_sha} ({self.timestamp.isoformat()})" 

105 if self.branch_name: 

106 marker = "*" if self.is_current_branch else " " 

107 return ( 

108 f"{marker} {self.branch_name} -> {self.short_sha} " 

109 f"({self.timestamp.isoformat()})" 

110 ) 

111 if self.commit_message: 

112 return ( 

113 f"{self.short_sha} {self.commit_message} - {self.author_name} " 

114 f"({self.timestamp.isoformat()})" 

115 ) 

116 return f"{self.short_sha} ({self.timestamp.isoformat()})" 

117 

118 @model_validator(mode="before") 

119 @classmethod 

120 def parse_git_references(cls, data: Any) -> Any: 

121 """ 

122 Extracts branch and tag metadata from the 'refs' input string. 

123 

124 Logic identifies the current branch via 'HEAD ->' and extracts 

125 the most recent tags. 

126 """ 

127 if not isinstance(data, dict): 

128 return data 

129 

130 if "ref_names" in data: 

131 data["refs"] = data["ref_names"] 

132 

133 if "refs" not in data: 

134 return data 

135 

136 reference_string = data["refs"] 

137 reference_parts = [part.strip() for part in reference_string.split(",")] 

138 found_tags = [] 

139 

140 for part in reference_parts: 

141 # Detect current branch from 'HEAD -> branch_name' 

142 if "HEAD ->" in part: 

143 data["branch_name"] = part.replace("HEAD ->", "").strip() 

144 data["is_current_branch"] = True 

145 

146 # Detect tags 

147 elif "tag:" in part: 

148 tag_content = part.replace("tag:", "").strip() 

149 found_tags.append(tag_content) 

150 

151 # Fallback for plain branch names if HEAD was not explicitly indicated 

152 elif not data.get("branch_name") and not part.startswith("tag:"): 

153 data["branch_name"] = part 

154 

155 # The first tag in the ref list is considered the closest/most recent 

156 if found_tags and not data.get("tag_name"): 

157 data["tag_name"] = found_tags[0] 

158 

159 return data 

160 

161 

162class GitRepository: 

163 """ 

164 Refined interface for Git operations using Pydantic and safe execution. 

165 

166 Provides properties and methods to query a Git repository's status, branches, tags, 

167 and commits. It encapsulates subprocess calls to Git and returns typed 

168 Pydantic models. 

169 

170 .. code-block:: python 

171 

172 repo = GitRepository() 

173 if repo.is_available: 

174 print(repo.last_tag.tag_name if repo.last_tag else "No tags found") 

175 """ 

176 

177 def __init__( 

178 self, 

179 repository_path: Path | str | None = None, 

180 ) -> None: 

181 """ 

182 Initializes the GitRepository instance. 

183 

184 :param repository_path: The base path to the Git repository. 

185 Defaults to the current working directory. 

186 """ 

187 self.base_path = Path(repository_path or Path.cwd()).resolve() 

188 

189 def __str__(self) -> str: 

190 """Return a concise string representation.""" 

191 if not self.is_available: 

192 return f"GitRepository({self.base_path}) - Unavailable" 

193 

194 status = "*" if self.is_dirty else "" 

195 head = self.head_name or "detached" 

196 

197 current = self.current_commit 

198 tag = self.last_tag 

199 branch = self.current_branch 

200 

201 return ( 

202 f"GitRepository(path={self.base_path!r}, is_available={self.is_available}, " 

203 f"commit_count={self.commit_count}, is_dirty={self.is_dirty}, " 

204 f"dirty_files={self.dirty_files}, " 

205 f"current_commit={current.short_sha if current else None}, " 

206 f"last_tag={tag.tag_name if tag else None}, " 

207 f"current_branch={branch.branch_name if branch else None}" 

208 f") - {head}{status}" 

209 ) 

210 

211 def __repr__(self) -> str: 

212 """Return a detailed string representation.""" 

213 return f"GitRepository(base_path={self.base_path!r})" 

214 

215 @property 

216 def is_available(self) -> bool: 

217 """ 

218 Checks if the path is inside a valid git work tree. 

219 

220 :return: True if the base path is a valid Git repository work tree, 

221 False otherwise. 

222 """ 

223 return self._execute_command(["rev-parse", "--is-inside-work-tree"]) == "true" 

224 

225 @property 

226 def root_directory(self) -> Path: 

227 """ 

228 Gets the root directory of the Git repository. 

229 

230 :return: The absolute path to the root directory of the Git repository. 

231 :raises NotAGitRepositoryError: If the repository is not valid. 

232 """ 

233 self._ensure_valid_repository() 

234 return Path(self._execute_command(["rev-parse", "--show-toplevel"])) 

235 

236 @property 

237 def repository_name(self) -> str: 

238 """ 

239 Gets the name of the Git repository. 

240 

241 Extracts the repository name from the remote origin URL if available; otherwise, 

242 falls back to the name of the root directory. 

243 

244 :return: The string name of the Git repository. 

245 """ 

246 if remote_url := self.remote_origin_url: 

247 name = remote_url.split("/")[-1] 

248 return name[:-4] if name.endswith(".git") else name 

249 return self.root_directory.name 

250 

251 @property 

252 def remote_origin_url(self) -> str: 

253 """ 

254 Gets the remote origin URL. 

255 

256 :return: The URL of the remote origin, or an empty string if not configured. 

257 """ 

258 return self._execute_command(["config", "--get", "remote.origin.url"]) 

259 

260 @property 

261 def commit_count(self) -> int: 

262 """ 

263 Gets the total number of commits in the repository. 

264 

265 :return: The total number of commits. 

266 """ 

267 if not self.is_available: 

268 return 0 

269 try: 

270 return int(self._execute_command(["rev-list", "--count", "HEAD"]) or 0) 

271 except ValueError: 

272 return 0 

273 

274 @property 

275 def is_dirty(self) -> bool: 

276 """ 

277 Checks if the repository has uncommitted changes. 

278 

279 :return: True if there are uncommitted changes, False otherwise. 

280 """ 

281 return bool(self.dirty_files) 

282 

283 @property 

284 def dirty_files(self) -> list[str]: 

285 """ 

286 Gets a list of modified files. 

287 

288 :return: A list of file paths that have uncommitted changes. 

289 """ 

290 output = self._execute_command(["status", "--porcelain"]) 

291 return [line[3:] for line in output.splitlines() if line] 

292 

293 @property 

294 def current_commit(self) -> GitReference | None: 

295 """ 

296 Gets the most recent commit. 

297 

298 :return: The most recent GitReference object, or None if no commits exist. 

299 """ 

300 return next(self.commits, None) 

301 

302 @property 

303 def last_tag(self) -> GitReference | None: 

304 """ 

305 Gets the most recent tag. 

306 

307 :return: The most recent GitReference object, or None if no tags exist. 

308 """ 

309 return next(self.tags, None) 

310 

311 @property 

312 def current_branch(self) -> GitReference | None: 

313 """ 

314 Gets the currently checked-out branch. 

315 

316 :return: The GitReference object representing the current branch, 

317 or None if detached. 

318 """ 

319 return next( 

320 (branch for branch in self.branches if branch.is_current_branch), 

321 None, 

322 ) 

323 

324 @property 

325 def head_name(self) -> str: 

326 """ 

327 Gets the branch name or short sha of HEAD. 

328 

329 :return: The current branch name, or the short SHA if in a detached HEAD state. 

330 """ 

331 if branch := self.current_branch: 

332 return branch.branch_name 

333 if current := self.current_commit: 

334 return current.short_sha 

335 return "" 

336 

337 @property 

338 def commits(self) -> Iterator[GitReference]: 

339 """ 

340 Yields all commits in the repository. 

341 

342 :return: An iterator of GitReference objects. 

343 :raises NotAGitRepositoryError: If the repository is not valid. 

344 """ 

345 self._ensure_valid_repository() 

346 total_commits = self.commit_count 

347 format_string = "%H|%h|%cI|%an|%ae|%s|%D" 

348 lines = self._stream_command(["log", f"--format={format_string}"]) 

349 

350 for index, line in enumerate(lines): 

351 parts = line.split("|", 6) 

352 if len(parts) == 7: # noqa: PLR2004 

353 tag_name = "" 

354 branch_name = "" 

355 is_current_branch = False 

356 

357 refs = parts[6].split(", ") if parts[6] else [] 

358 for ref in refs: 

359 if ref.startswith("tag: "): 

360 tag_name = ref[5:] 

361 elif "->" in ref: 

362 branch_name = ref.split(" -> ")[1] 

363 is_current_branch = True 

364 elif ( 

365 ref 

366 and not ref.startswith("origin/") 

367 and ref != "HEAD" 

368 and not branch_name 

369 ): 

370 branch_name = ref 

371 

372 yield GitReference( 

373 commit_sha=parts[0], 

374 short_sha=parts[1], 

375 timestamp=datetime.fromisoformat(parts[2].replace("Z", "+00:00")), 

376 author_name=parts[3], 

377 author_email=parts[4], 

378 commit_message=parts[5], 

379 tag_name=tag_name, 

380 branch_name=branch_name, 

381 is_current_branch=is_current_branch, 

382 distance_from_head=index, 

383 is_head_commit=(index == 0), 

384 total_commits=total_commits, 

385 ) 

386 

387 @property 

388 def tags(self) -> Iterator[GitReference]: 

389 """ 

390 Yields all tags in the repository. 

391 

392 :return: An iterator of GitReference objects. 

393 :raises NotAGitRepositoryError: If the repository is not valid. 

394 """ 

395 self._ensure_valid_repository() 

396 current = self.current_commit 

397 head_sha = current.commit_sha if current else "" 

398 total_commits = self.commit_count 

399 format_string = "%(refname:short)|%(creatordate:iso-strict)|%(objectname)" 

400 

401 lines = self._stream_command( 

402 [ 

403 "for-each-ref", 

404 "--sort=-creatordate", 

405 f"--format={format_string}", 

406 "refs/tags/", 

407 ] 

408 ) 

409 

410 for line in lines: 

411 name, date_str, sha = line.split("|") 

412 distance_str = self._execute_command( 

413 ["rev-list", "--count", f"{sha}..HEAD"] 

414 ) 

415 yield GitReference( 

416 tag_name=name, 

417 commit_sha=sha, 

418 short_sha=sha[:7], 

419 timestamp=datetime.fromisoformat(date_str.replace("Z", "+00:00")), 

420 distance_from_head=int(distance_str or 0), 

421 is_head_commit=(sha == head_sha), 

422 total_commits=total_commits, 

423 ) 

424 

425 @property 

426 def branches(self) -> Iterator[GitReference]: 

427 """ 

428 Yields all branches in the repository. 

429 

430 :return: An iterator of GitReference objects. 

431 :raises NotAGitRepositoryError: If the repository is not valid. 

432 """ 

433 self._ensure_valid_repository() 

434 current = self.current_commit 

435 head_sha = current.commit_sha if current else "" 

436 total_commits = self.commit_count 

437 format_string = ( 

438 "%(refname:short)|%(objectname)|%(HEAD)|%(committerdate:iso-strict)" 

439 ) 

440 

441 lines = self._stream_command( 

442 [ 

443 "for-each-ref", 

444 f"--format={format_string}", 

445 "refs/heads/", 

446 "refs/remotes/", 

447 ] 

448 ) 

449 

450 for line in lines: 

451 name, sha, current_marker, date_str = line.split("|") 

452 yield GitReference( 

453 branch_name=name, 

454 commit_sha=sha, 

455 short_sha=sha[:7], 

456 timestamp=datetime.fromisoformat(date_str.replace("Z", "+00:00")), 

457 distance_from_head=0, 

458 is_head_commit=(sha == head_sha), 

459 is_current_branch=(current_marker == "*"), 

460 total_commits=total_commits, 

461 ) 

462 

463 def _stream_command(self, arguments: list[str]) -> Iterator[str]: 

464 """Executes a git command and streams output line by line.""" 

465 full_command = ["git", *arguments] 

466 try: 

467 with subprocess.Popen( # noqa: S603 

468 full_command, cwd=self.base_path, stdout=subprocess.PIPE, text=True 

469 ) as process: 

470 if process.stdout: 

471 for line in process.stdout: 

472 if clean_line := line.strip(): 

473 yield clean_line 

474 except (subprocess.CalledProcessError, FileNotFoundError, OSError) as error: 

475 logger.debug(f"Command '{shlex.join(full_command)}' failed: {error}") 

476 

477 def _execute_command(self, arguments: list[str]) -> str: 

478 """Executes a git command with standardized error handling.""" 

479 full_command = ["git", *arguments] 

480 try: 

481 return subprocess.run( # noqa: S603 

482 full_command, 

483 cwd=self.base_path, 

484 capture_output=True, 

485 text=True, 

486 check=True, 

487 ).stdout.strip() 

488 except (subprocess.CalledProcessError, FileNotFoundError, OSError) as error: 

489 logger.debug(f"Command '{shlex.join(full_command)}' failed: {error}") 

490 return "" 

491 

492 def _ensure_valid_repository(self) -> None: 

493 """Raises an error if the directory is not a Git repository.""" 

494 if not self.is_available: 

495 raise NotAGitRepositoryError( 

496 f"Path '{self.base_path}' is not a Git repository." 

497 )