Skip to content

Commit 60dfd18

Browse files
create/info: fix discrepancies in archive stats, fixes borgbackup#8898, borgbackup#9003
do not account archive metadata, only file contents.
1 parent 6a826ef commit 60dfd18

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

src/borg/archive.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,10 @@ def __init__(self, repository, key, manifest, name, cache=None, create=False,
469469
self.pipeline = DownloadPipeline(self.repository, self.key)
470470
self.create = create
471471
if self.create:
472-
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
472+
# Use a separate statistics counter for metadata (items, archive metadata),
473+
# so that archive.stats reflects only file content statistics.
474+
self.meta_stats = Statistics(output_json=False, iec=iec)
475+
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.meta_stats)
473476
if name in manifest.archives:
474477
raise self.AlreadyExists(name)
475478
i = 0
@@ -605,7 +608,8 @@ def add_item(self, item, show_progress=True, stats=None):
605608
def write_checkpoint(self):
606609
self.save(self.checkpoint_name)
607610
del self.manifest.archives[self.checkpoint_name]
608-
self.cache.chunk_decref(self.id, self.stats)
611+
# Use meta_stats so metadata chunks do not affect archive.stats
612+
self.cache.chunk_decref(self.id, self.meta_stats if hasattr(self, 'meta_stats') else self.stats)
609613

610614
def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
611615
name = name or self.name
@@ -649,7 +653,8 @@ def save(self, name=None, comment=None, timestamp=None, stats=None, additional_m
649653
data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b'archive')
650654
self.id = self.key.id_hash(data)
651655
try:
652-
self.cache.add_chunk(self.id, data, self.stats)
656+
# Use meta_stats so metadata chunk addition does not skew archive.stats
657+
self.cache.add_chunk(self.id, data, self.meta_stats if hasattr(self, 'meta_stats') else self.stats)
653658
except IntegrityError as err:
654659
err_msg = str(err)
655660
# hack to avoid changing the RPC protocol by introducing new (more specific) exception class
@@ -687,21 +692,23 @@ def _calc_stats(self, cache, want_unique=True):
687692
if have_borg12_meta and not want_unique:
688693
unique_csize = 0
689694
else:
690-
def add(id):
691-
entry = cache.chunks[id]
692-
archive_index.add(id, 1, entry.size, entry.csize)
693695

694696
archive_index = ChunkIndex()
695697
sync = CacheSynchronizer(archive_index)
696-
add(self.id)
698+
# do NOT add the archive metadata chunk (self.id) here.
699+
# The metadata chunk is accounted via meta_stats during creation and must not
700+
# contribute to the "This archive" deduplicated size computed by borg info.
701+
# See issue #9003: make info's deduplicated size match create-time stats.
702+
697703
# we must escape any % char in the archive name, because we use it in a format string, see #6500
698704
arch_name_escd = self.name.replace('%', '%%')
699705
pi = ProgressIndicatorPercent(total=len(self.metadata.items),
700706
msg='Calculating statistics for archive %s ... %%3.0f%%%%' % arch_name_escd,
701707
msgid='archive.calc_stats')
702708
for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
703709
pi.show(increase=1)
704-
add(id)
710+
# do NOT add(id) here, this is a metadata stream chunk and should not
711+
# be accounted for in stats, see comment above.
705712
data = self.key.decrypt(id, chunk)
706713
sync.feed(data)
707714
unique_csize = archive_index.stats_against(cache.chunks)[3]

0 commit comments

Comments
 (0)