diff --git a/git_analytics/sources/__init__.py b/git_analytics/sources/__init__.py index bf59a31..0aa904a 100644 --- a/git_analytics/sources/__init__.py +++ b/git_analytics/sources/__init__.py @@ -1,5 +1,7 @@ from .git_commit_adapter import GitCommitSource +from .git_log_adapter import GitLogSource __all__ = [ "GitCommitSource", + "GitLogSource", ] diff --git a/git_analytics/sources/git_log_adapter.py b/git_analytics/sources/git_log_adapter.py index b5e99a0..a30d933 100644 --- a/git_analytics/sources/git_log_adapter.py +++ b/git_analytics/sources/git_log_adapter.py @@ -5,118 +5,118 @@ from typing import Iterable, Iterator from git_analytics.entities import AnalyticsCommit +from git_analytics.interfaces import CommitSource -# headers +# commit header lines _RE_COMMIT = re.compile(r"^commit\s+(?P[0-9a-f]{7,40})\s*$", re.I) -_RE_AUTHOR = re.compile(r"^Author:\s*(?P.+?)\s*<(?P[^>]+)>\s*$", re.I) +_RE_AUTHOR = re.compile( + r"^Author:\s*(?P[^<]+?)(?:\s*<(?P[^>]+)>)?\s*$", + re.I, +) _RE_DATE = re.compile(r"^Date:\s*(?P
.+?)\s*$", re.I) -# numstat lines: "\t\t" +# numstat: "\t\t" _RE_NUMSTAT = re.compile(r"^\s*(?P-|\d+)\s+(?P-|\d+)\s+(?P.+)$") - - -def _parse_dt_iso(s: str) -> datetime: - # Example: 2025-08-16T16:35:39+02:00 — works fine with fromisoformat - return datetime.fromisoformat(s.strip()) - - -def _yield_commits(lines: Iterable[str]) -> Iterator[AnalyticsCommit]: - sha: str | None = None - author: str | None = None - dt: datetime | None = None - - # collect only the first non-empty message line (subject) - subject: str | None = None - in_message_block = False - headers_done = False - - ins_total = 0 - del_total = 0 - files_changed = 0 - - def flush(): - nonlocal sha, author, dt, subject, in_message_block, headers_done - nonlocal ins_total, del_total, files_changed - if not sha: - return - yield AnalyticsCommit( - sha=sha, - commit_author=author or "Unknown", - committed_datetime=dt or _parse_dt_iso("1970-01-01T00:00:00+00:00"), - lines_insertions=ins_total, - lines_deletions=del_total, - files_changed=files_changed, - message=subject or "", - ) - # reset state for the next commit - sha = author = subject = None - dt = None - in_message_block = False - headers_done = False - ins_total = del_total = files_changed = 0 - - for raw in lines: - line = raw.rstrip("\n") - - # start of a new commit - m = _RE_COMMIT.match(line) - if m: - # flush the previous commit block - yield from flush() - sha = m.group("sha") - continue - - if sha and not headers_done: - ma = _RE_AUTHOR.match(line) - if ma: - # you only have one commit_author field: join "Name " - name = ma.group("name").strip() - email = ma.group("email").strip() - author = f"{name} <{email}>" +# optional merge line we just skip +_RE_MERGE = re.compile(r"^Merge:\s+", re.I) + + +class GitLogSource(CommitSource): + def __init__(self, text: str) -> None: + self._text = text + + def iter_commits(self) -> Iterator[AnalyticsCommit]: + yield from self.yield_commits(self._text.splitlines()) + + @staticmethod + def yield_commits(lines: Iterable[str]) -> Iterator[AnalyticsCommit]: + sha: str | None = None + author_name: str | None = None + dt: datetime | None = None + + subject: str | None = None + in_headers = False + in_message = False + + ins_total = 0 + del_total = 0 + files_changed = 0 + + def flush(): + nonlocal sha, author_name, dt, subject, in_headers, in_message + nonlocal ins_total, del_total, files_changed + if not sha: + return + committed_dt = dt if dt is not None else datetime.fromtimestamp(0) + yield AnalyticsCommit( + sha=sha, + commit_author=author_name or "Unknown", + committed_datetime=committed_dt, + lines_insertions=ins_total, + lines_deletions=del_total, + files_changed=files_changed, + message=subject or "", + ) + sha = None + author_name = None + dt = None + subject = None + in_headers = False + in_message = False + ins_total = 0 + del_total = 0 + files_changed = 0 + + for raw in lines: + line = raw.rstrip("\n") + + m_commit = _RE_COMMIT.match(line) + if m_commit: + yield from flush() + sha = m_commit.group("sha") + in_headers = True + in_message = False continue - md = _RE_DATE.match(line) - if md: - dt = _parse_dt_iso(md.group("dt")) - continue + if sha and in_headers: + if _RE_MERGE.match(line): + continue - if line.strip() == "": - # empty line separates headers from commit message - headers_done = True - in_message_block = True - continue + m_author = _RE_AUTHOR.match(line) + if m_author: + author_name = m_author.group("name").strip() + continue - if sha and in_message_block: - # take the first non-empty line as subject - if line.strip(): - # if it's already a numstat line — then there's no message - mn = _RE_NUMSTAT.match(line) - if mn: - in_message_block = False - # don't continue — let it be processed as numstat below - else: - subject = line.strip() - # later lines may be body text — skip until first numstat + m_date = _RE_DATE.match(line) + if m_date: + dt_str = m_date.group("dt").strip() + dt = datetime.fromisoformat(dt_str) continue - else: - # ignore empty lines in the body - continue - if sha: - # numstat (may appear right after headers or after subject) - mn = _RE_NUMSTAT.match(line) - if mn: - ins_s, del_s = mn.group("ins"), mn.group("del") - ins = int(ins_s) if ins_s.isdigit() else 0 # '-' для бинарников - dels = int(del_s) if del_s.isdigit() else 0 - ins_total += ins - del_total += dels - files_changed += 1 - continue + if line.strip() == "": + in_headers = False + in_message = True + continue - # flush the last commit - yield from flush() + if sha and in_message: + if line.strip(): + if _RE_NUMSTAT.match(line): + in_message = False + else: + subject = line.strip() + continue + else: + continue + if sha: + m_ns = _RE_NUMSTAT.match(line) + if m_ns: + ins_s, del_s = m_ns.group("ins"), m_ns.group("del") + ins = int(ins_s) if ins_s.isdigit() else 0 + dels = int(del_s) if del_s.isdigit() else 0 + ins_total += ins + del_total += dels + files_changed += 1 + in_message = False + continue -def text_commits_to_analytics_commits(path_to_file: str) -> list[AnalyticsCommit]: - with open(path_to_file, "r", encoding="utf-8") as f: - return list(_yield_commits(f)) + yield from flush() diff --git a/git_analytics/static/index.html b/git_analytics/static/index.html index 9dbf806..ce28438 100644 --- a/git_analytics/static/index.html +++ b/git_analytics/static/index.html @@ -183,7 +183,7 @@

Other statistics

- © 2025 ver 0.1.10 + © 2025 ver 0.1.11
diff --git a/pyproject.toml b/pyproject.toml index f04e1aa..79d98fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "git-analytics" -version = "0.1.10" +version = "0.1.11" description = "Advanced analytics for Git repositories — commits, authors, code churn, lines of code, trends, and visual dashboards." authors = ["n0rfas "] license = "MIT"