Coverage for src / gitversioned / utils / git.py: 0%
184 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-14 20:57 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-14 20:57 +0000
1"""
2Git repository utility module.
4This module provides a robust interface for interacting with Git repositories.
5It uses a combination of subprocess calls and Pydantic models to safely and
6efficiently retrieve and represent Git metadata such as commits, tags, and branches.
8.. code-block:: python
10 from gitversioned.utils.git import GitRepository
12 repo = GitRepository()
13 if repo.is_available:
14 print(repo.current_branch.branch_name)
15"""
17from __future__ import annotations
19import shlex
20import subprocess
21import sys
22from collections.abc import Iterator
23from datetime import datetime
24from pathlib import Path
25from typing import Any
27from pydantic import BaseModel, Field, model_validator
29from gitversioned.logging import logger
31__all__ = [
32 "GitReference",
33 "GitRepository",
34 "NotAGitRepositoryError",
35]
38class NotAGitRepositoryError(Exception):
39 """
40 Raised when the directory is not a valid Git repository.
42 This exception is raised when Git operations are attempted on a directory that
43 is not part of a valid Git work tree.
44 """
47class GitReference(BaseModel):
48 """
49 Pydantic model representing a Git reference (commit, tag, or branch).
51 Provides the foundational metadata fields for Git objects and includes
52 optional fields for specific types like author information for commits,
53 or names for tags and branches.
55 .. code-block:: python
57 def print_metadata(metadata: GitReference):
58 print(f"SHA: {metadata.short_sha}, HEAD: {metadata.is_head_commit}")
59 """
61 commit_sha: str = Field(
62 description="The full, un-abbreviated SHA hash of the commit.",
63 default="",
64 )
65 short_sha: str = Field(
66 description="The abbreviated SHA hash of the commit for display purposes.",
67 default="",
68 )
69 timestamp: datetime = Field(
70 description="The creation or commit timestamp of the Git object.",
71 default=datetime.min,
72 )
73 distance_from_head: int = Field(
74 description="The number of commits between this object and the current HEAD.",
75 default=sys.maxsize,
76 )
77 is_head_commit: bool = Field(
78 description="Indicates whether this object represents the current HEAD commit.",
79 default=False,
80 )
81 total_commits: int = Field(
82 description="Total number of commits in the repository.",
83 default=0,
84 )
85 author_name: str = Field(
86 description="The name of the author who created the commit.", default=""
87 )
88 author_email: str = Field(
89 description="The email address of the author who created the commit.",
90 default="",
91 )
92 commit_message: str = Field(
93 description="The full message associated with the commit.", default=""
94 )
95 tag_name: str = Field(description="The name of the Git tag.", default="")
96 branch_name: str = Field(description="The name of the Git branch.", default="")
97 is_current_branch: bool = Field(
98 description="Indicates whether this branch is currently checked out.",
99 default=False,
100 )
102 def __str__(self) -> str:
103 if self.tag_name:
104 return f"{self.tag_name} -> {self.short_sha} ({self.timestamp.isoformat()})"
105 if self.branch_name:
106 marker = "*" if self.is_current_branch else " "
107 return (
108 f"{marker} {self.branch_name} -> {self.short_sha} "
109 f"({self.timestamp.isoformat()})"
110 )
111 if self.commit_message:
112 return (
113 f"{self.short_sha} {self.commit_message} - {self.author_name} "
114 f"({self.timestamp.isoformat()})"
115 )
116 return f"{self.short_sha} ({self.timestamp.isoformat()})"
118 @model_validator(mode="before")
119 @classmethod
120 def parse_git_references(cls, data: Any) -> Any:
121 """
122 Extracts branch and tag metadata from the 'refs' input string.
124 Logic identifies the current branch via 'HEAD ->' and extracts
125 the most recent tags.
126 """
127 if not isinstance(data, dict):
128 return data
130 if "ref_names" in data:
131 data["refs"] = data["ref_names"]
133 if "refs" not in data:
134 return data
136 reference_string = data["refs"]
137 reference_parts = [part.strip() for part in reference_string.split(",")]
138 found_tags = []
140 for part in reference_parts:
141 # Detect current branch from 'HEAD -> branch_name'
142 if "HEAD ->" in part:
143 data["branch_name"] = part.replace("HEAD ->", "").strip()
144 data["is_current_branch"] = True
146 # Detect tags
147 elif "tag:" in part:
148 tag_content = part.replace("tag:", "").strip()
149 found_tags.append(tag_content)
151 # Fallback for plain branch names if HEAD was not explicitly indicated
152 elif not data.get("branch_name") and not part.startswith("tag:"):
153 data["branch_name"] = part
155 # The first tag in the ref list is considered the closest/most recent
156 if found_tags and not data.get("tag_name"):
157 data["tag_name"] = found_tags[0]
159 return data
162class GitRepository:
163 """
164 Refined interface for Git operations using Pydantic and safe execution.
166 Provides properties and methods to query a Git repository's status, branches, tags,
167 and commits. It encapsulates subprocess calls to Git and returns typed
168 Pydantic models.
170 .. code-block:: python
172 repo = GitRepository()
173 if repo.is_available:
174 print(repo.last_tag.tag_name if repo.last_tag else "No tags found")
175 """
177 def __init__(
178 self,
179 repository_path: Path | str | None = None,
180 ) -> None:
181 """
182 Initializes the GitRepository instance.
184 :param repository_path: The base path to the Git repository.
185 Defaults to the current working directory.
186 """
187 self.base_path = Path(repository_path or Path.cwd()).resolve()
189 def __str__(self) -> str:
190 """Return a concise string representation."""
191 if not self.is_available:
192 return f"GitRepository({self.base_path}) - Unavailable"
194 status = "*" if self.is_dirty else ""
195 head = self.head_name or "detached"
197 current = self.current_commit
198 tag = self.last_tag
199 branch = self.current_branch
201 return (
202 f"GitRepository(path={self.base_path!r}, is_available={self.is_available}, "
203 f"commit_count={self.commit_count}, is_dirty={self.is_dirty}, "
204 f"dirty_files={self.dirty_files}, "
205 f"current_commit={current.short_sha if current else None}, "
206 f"last_tag={tag.tag_name if tag else None}, "
207 f"current_branch={branch.branch_name if branch else None}"
208 f") - {head}{status}"
209 )
211 def __repr__(self) -> str:
212 """Return a detailed string representation."""
213 return f"GitRepository(base_path={self.base_path!r})"
215 @property
216 def is_available(self) -> bool:
217 """
218 Checks if the path is inside a valid git work tree.
220 :return: True if the base path is a valid Git repository work tree,
221 False otherwise.
222 """
223 return self._execute_command(["rev-parse", "--is-inside-work-tree"]) == "true"
225 @property
226 def root_directory(self) -> Path:
227 """
228 Gets the root directory of the Git repository.
230 :return: The absolute path to the root directory of the Git repository.
231 :raises NotAGitRepositoryError: If the repository is not valid.
232 """
233 self._ensure_valid_repository()
234 return Path(self._execute_command(["rev-parse", "--show-toplevel"]))
236 @property
237 def repository_name(self) -> str:
238 """
239 Gets the name of the Git repository.
241 Extracts the repository name from the remote origin URL if available; otherwise,
242 falls back to the name of the root directory.
244 :return: The string name of the Git repository.
245 """
246 if remote_url := self.remote_origin_url:
247 name = remote_url.split("/")[-1]
248 return name[:-4] if name.endswith(".git") else name
249 return self.root_directory.name
251 @property
252 def remote_origin_url(self) -> str:
253 """
254 Gets the remote origin URL.
256 :return: The URL of the remote origin, or an empty string if not configured.
257 """
258 return self._execute_command(["config", "--get", "remote.origin.url"])
260 @property
261 def commit_count(self) -> int:
262 """
263 Gets the total number of commits in the repository.
265 :return: The total number of commits.
266 """
267 if not self.is_available:
268 return 0
269 try:
270 return int(self._execute_command(["rev-list", "--count", "HEAD"]) or 0)
271 except ValueError:
272 return 0
274 @property
275 def is_dirty(self) -> bool:
276 """
277 Checks if the repository has uncommitted changes.
279 :return: True if there are uncommitted changes, False otherwise.
280 """
281 return bool(self.dirty_files)
283 @property
284 def dirty_files(self) -> list[str]:
285 """
286 Gets a list of modified files.
288 :return: A list of file paths that have uncommitted changes.
289 """
290 output = self._execute_command(["status", "--porcelain"])
291 return [line[3:] for line in output.splitlines() if line]
293 @property
294 def current_commit(self) -> GitReference | None:
295 """
296 Gets the most recent commit.
298 :return: The most recent GitReference object, or None if no commits exist.
299 """
300 return next(self.commits, None)
302 @property
303 def last_tag(self) -> GitReference | None:
304 """
305 Gets the most recent tag.
307 :return: The most recent GitReference object, or None if no tags exist.
308 """
309 return next(self.tags, None)
311 @property
312 def current_branch(self) -> GitReference | None:
313 """
314 Gets the currently checked-out branch.
316 :return: The GitReference object representing the current branch,
317 or None if detached.
318 """
319 return next(
320 (branch for branch in self.branches if branch.is_current_branch),
321 None,
322 )
324 @property
325 def head_name(self) -> str:
326 """
327 Gets the branch name or short sha of HEAD.
329 :return: The current branch name, or the short SHA if in a detached HEAD state.
330 """
331 if branch := self.current_branch:
332 return branch.branch_name
333 if current := self.current_commit:
334 return current.short_sha
335 return ""
337 @property
338 def commits(self) -> Iterator[GitReference]:
339 """
340 Yields all commits in the repository.
342 :return: An iterator of GitReference objects.
343 :raises NotAGitRepositoryError: If the repository is not valid.
344 """
345 self._ensure_valid_repository()
346 total_commits = self.commit_count
347 format_string = "%H|%h|%cI|%an|%ae|%s|%D"
348 lines = self._stream_command(["log", f"--format={format_string}"])
350 for index, line in enumerate(lines):
351 parts = line.split("|", 6)
352 if len(parts) == 7: # noqa: PLR2004
353 tag_name = ""
354 branch_name = ""
355 is_current_branch = False
357 refs = parts[6].split(", ") if parts[6] else []
358 for ref in refs:
359 if ref.startswith("tag: "):
360 tag_name = ref[5:]
361 elif "->" in ref:
362 branch_name = ref.split(" -> ")[1]
363 is_current_branch = True
364 elif (
365 ref
366 and not ref.startswith("origin/")
367 and ref != "HEAD"
368 and not branch_name
369 ):
370 branch_name = ref
372 yield GitReference(
373 commit_sha=parts[0],
374 short_sha=parts[1],
375 timestamp=datetime.fromisoformat(parts[2].replace("Z", "+00:00")),
376 author_name=parts[3],
377 author_email=parts[4],
378 commit_message=parts[5],
379 tag_name=tag_name,
380 branch_name=branch_name,
381 is_current_branch=is_current_branch,
382 distance_from_head=index,
383 is_head_commit=(index == 0),
384 total_commits=total_commits,
385 )
387 @property
388 def tags(self) -> Iterator[GitReference]:
389 """
390 Yields all tags in the repository.
392 :return: An iterator of GitReference objects.
393 :raises NotAGitRepositoryError: If the repository is not valid.
394 """
395 self._ensure_valid_repository()
396 current = self.current_commit
397 head_sha = current.commit_sha if current else ""
398 total_commits = self.commit_count
399 format_string = "%(refname:short)|%(creatordate:iso-strict)|%(objectname)"
401 lines = self._stream_command(
402 [
403 "for-each-ref",
404 "--sort=-creatordate",
405 f"--format={format_string}",
406 "refs/tags/",
407 ]
408 )
410 for line in lines:
411 name, date_str, sha = line.split("|")
412 distance_str = self._execute_command(
413 ["rev-list", "--count", f"{sha}..HEAD"]
414 )
415 yield GitReference(
416 tag_name=name,
417 commit_sha=sha,
418 short_sha=sha[:7],
419 timestamp=datetime.fromisoformat(date_str.replace("Z", "+00:00")),
420 distance_from_head=int(distance_str or 0),
421 is_head_commit=(sha == head_sha),
422 total_commits=total_commits,
423 )
425 @property
426 def branches(self) -> Iterator[GitReference]:
427 """
428 Yields all branches in the repository.
430 :return: An iterator of GitReference objects.
431 :raises NotAGitRepositoryError: If the repository is not valid.
432 """
433 self._ensure_valid_repository()
434 current = self.current_commit
435 head_sha = current.commit_sha if current else ""
436 total_commits = self.commit_count
437 format_string = (
438 "%(refname:short)|%(objectname)|%(HEAD)|%(committerdate:iso-strict)"
439 )
441 lines = self._stream_command(
442 [
443 "for-each-ref",
444 f"--format={format_string}",
445 "refs/heads/",
446 "refs/remotes/",
447 ]
448 )
450 for line in lines:
451 name, sha, current_marker, date_str = line.split("|")
452 yield GitReference(
453 branch_name=name,
454 commit_sha=sha,
455 short_sha=sha[:7],
456 timestamp=datetime.fromisoformat(date_str.replace("Z", "+00:00")),
457 distance_from_head=0,
458 is_head_commit=(sha == head_sha),
459 is_current_branch=(current_marker == "*"),
460 total_commits=total_commits,
461 )
463 def _stream_command(self, arguments: list[str]) -> Iterator[str]:
464 """Executes a git command and streams output line by line."""
465 full_command = ["git", *arguments]
466 try:
467 with subprocess.Popen( # noqa: S603
468 full_command, cwd=self.base_path, stdout=subprocess.PIPE, text=True
469 ) as process:
470 if process.stdout:
471 for line in process.stdout:
472 if clean_line := line.strip():
473 yield clean_line
474 except (subprocess.CalledProcessError, FileNotFoundError, OSError) as error:
475 logger.debug(f"Command '{shlex.join(full_command)}' failed: {error}")
477 def _execute_command(self, arguments: list[str]) -> str:
478 """Executes a git command with standardized error handling."""
479 full_command = ["git", *arguments]
480 try:
481 return subprocess.run( # noqa: S603
482 full_command,
483 cwd=self.base_path,
484 capture_output=True,
485 text=True,
486 check=True,
487 ).stdout.strip()
488 except (subprocess.CalledProcessError, FileNotFoundError, OSError) as error:
489 logger.debug(f"Command '{shlex.join(full_command)}' failed: {error}")
490 return ""
492 def _ensure_valid_repository(self) -> None:
493 """Raises an error if the directory is not a Git repository."""
494 if not self.is_available:
495 raise NotAGitRepositoryError(
496 f"Path '{self.base_path}' is not a Git repository."
497 )