Skip to content

Commit 0fa6f57

Browse files
info: adapt deduplicated archive size calculation to match borg create stats
also: test that both info and create output the same "this archive" "deduplicated size" stats.
1 parent f85d839 commit 0fa6f57

File tree

2 files changed

+28
-5
lines changed

2 files changed

+28
-5
lines changed

src/borg/archive.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -681,21 +681,23 @@ def _calc_stats(self, cache, want_unique=True):
681681
if have_borg12_meta and not want_unique:
682682
unique_csize = 0
683683
else:
684-
def add(id):
685-
entry = cache.chunks[id]
686-
archive_index.add(id, 1, entry.size, entry.csize)
687684

688685
archive_index = ChunkIndex()
689686
sync = CacheSynchronizer(archive_index)
690-
add(self.id)
687+
# do NOT add the archive metadata chunk (self.id) here.
688+
# The metadata chunk is accounted via meta_stats during creation and must not
689+
# contribute to the "This archive" deduplicated size computed by borg info.
690+
# See issue #9003: make info's deduplicated size match create-time stats.
691+
691692
# we must escape any % char in the archive name, because we use it in a format string, see #6500
692693
arch_name_escd = self.name.replace('%', '%%')
693694
pi = ProgressIndicatorPercent(total=len(self.metadata.items),
694695
msg='Calculating statistics for archive %s ... %%3.0f%%%%' % arch_name_escd,
695696
msgid='archive.calc_stats')
696697
for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
697698
pi.show(increase=1)
698-
add(id)
699+
# do NOT add(id) here, this is a metadata stream chunk and should not
700+
# be accounted for in stats, see comment above.
699701
data = self.key.decrypt(id, chunk)
700702
sync.feed(data)
701703
unique_csize = archive_index.stats_against(cache.chunks)[3]

src/borg/testsuite/archiver.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,27 @@ def test_info(self):
16171617
info_archive = self.cmd('info', '--first', '1', self.repository_location)
16181618
assert 'Archive name: test\n' in info_archive
16191619

1620+
def test_info_matches_create_deduplicated_size(self):
1621+
# Create two identical files to ensure intra-archive deduplication happens,
1622+
# so the deduplicated size is visibly different from compressed size.
1623+
data = b'X' * (1024 * 80)
1624+
self.create_regular_file('file1', contents=data)
1625+
self.create_regular_file('file2', contents=data)
1626+
self.cmd('init', '--encryption=repokey', self.repository_location)
1627+
create_out = self.cmd('create', '--stats', self.repository_location + '::test', 'input')
1628+
info_out = self.cmd('info', self.repository_location + '::test')
1629+
1630+
import re
1631+
1632+
def get_dedup(s):
1633+
m = re.search(r'^This archive:\s+(.*?)\s+(.*?)\s+(.+)$', s, re.M)
1634+
assert m is not None, s
1635+
return m.group(3).strip()
1636+
1637+
dedup_create = get_dedup(create_out)
1638+
dedup_info = get_dedup(info_out)
1639+
assert dedup_create == dedup_info
1640+
16201641
def test_info_json(self):
16211642
self.create_regular_file('file1', size=1024 * 80)
16221643
self.cmd('init', '--encryption=repokey', self.repository_location)

0 commit comments

Comments
 (0)