From 8c66da40fb74695bb7aab8d092e1d5d16258ebfe Mon Sep 17 00:00:00 2001 From: Sylvain Beucler Date: Sun, 29 Jun 2025 10:44:31 +0200 Subject: [PATCH] filter-repo: reduce export dump differences While the order of files passed to `git fast-import` do not matter, we can help users inspect --dry-run results if we can match the order that fast-export uses. fast-export uses a depth-first sorting order to ensure that entries underneath a directory would appear before the directory or some kind of file or symlink that has replaced a directory. However, its depth-first ordering is implemented in a way that affects things besides directories becoming symlinks or files -- in contrast to normal lexicographic ordering where a substring would sort before a longer string, fast-export reverses that. This means that when we try to inspect differences after a --dry-run, we may see a spurious difference such as: ``` $ ./git-filter-repo --dry-run --proceed $ diff -u .git/filter-repo/fast-export.original .git/filter-repo/fast-export.filtered ... @@ -1451,25 +1329,23 @@ D testcases/expected/case1-twenty D testcases/inputs/case1 M 100755 0a13abf13c87a13ae56c1fc664baa8b44bdac1de testcases/t9390-repo-filter.sh +M 100644 de3799fa8cd84a9ac56fad023ddf3da44eb444bb testcases/t9390/case1 M 100644 e0c88454095bd887852791401a3b2336ad47d012 testcases/t9390/case1-filename M 100644 a1aa78ff11f3a40cafdcd9b531e8c1342e8a75e5 testcases/t9390/case1-ten M 100644 488cbd9d101b107809611940d1cd00a8f72e1b17 testcases/t9390/case1-twenty -M 100644 de3799fa8cd84a9ac56fad023ddf3da44eb444bb testcases/t9390/case1 ``` Fix this by modifying filter-repo to use the same sorting of paths as fast-export when --dry-run is specified. Signed-off-by: Sylvain Beucler --- git-filter-repo | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/git-filter-repo b/git-filter-repo index 39c8680a..149f6d8c 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -33,6 +33,7 @@ operations; however: import argparse import collections import fnmatch +import functools import gettext import io import os @@ -3942,7 +3943,29 @@ class RepoFilter(object): continue # Otherwise, record the change new_file_changes[change.filename] = change - commit.file_changes = [v for k,v in sorted(new_file_changes.items())] + + # Use the git fast-export sorting algorithm for filenames + # https://github.com/git/git/blob/14de3eb34435db79c6e7edc8082c302a26a8330a/builtin/fast-export.c#L444-L448 + def depth_first(a, b): + fn_a = a[0] + fn_b = b[0] + + # Sort 'd/e' before 'd' + # first compare common length, then if equal give priority to longer one + min_len = min(len(fn_a), len(fn_b)) + # memcmp equivalent https://docs.python.org/3.0/whatsnew/3.0.html#ordering-comparisons + cmp = (fn_a[:min_len] > fn_b[:min_len]) - (fn_a[:min_len] < fn_b[:min_len]) + if cmp != 0: # different content + return cmp # return normal comparison + cmp = len(fn_b) - len(fn_a) + if cmp != 0: # different size + return cmp # longer one first + + # 'R' (rename) entries last + cmp = (a[1].type == 'R') - (b[1].type == 'R') + return cmp + commit.file_changes = [v for k,v in sorted(new_file_changes.items(), + key=functools.cmp_to_key(depth_first))] def _tweak_commit(self, commit, aux_info): if self._args.replace_message: