Skip to content

Commit e461509

Browse files
authored
Merge pull request #34037 from teskje/builtin-item-migration-ignore-new-versions
adapter: gracefully handle newer versions in migration shard
2 parents 1c9c16c + aefcac1 commit e461509

File tree

1 file changed

+29
-10
lines changed

1 file changed

+29
-10
lines changed

src/adapter/src/catalog/open/builtin_item_migration.rs

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ use mz_storage_client::controller::StorageTxn;
4040
use mz_storage_types::StorageDiff;
4141
use mz_storage_types::sources::SourceData;
4242
use timely::progress::{Antichain, Timestamp as TimelyTimestamp};
43-
use tracing::{debug, error, info};
43+
use tracing::{debug, error, info, warn};
4444

4545
use crate::catalog::open::builtin_item_migration::persist_schema::{TableKey, TableKeySchema};
4646
use crate::catalog::state::LocalExpressionCache;
@@ -402,10 +402,15 @@ async fn migrate_builtin_collections_incompatible(
402402
);
403403
let mut global_id_shards: BTreeMap<_, _> = snapshot
404404
.into_iter()
405-
.map(|((key, value), _ts, _diff)| {
406-
let table_key = key.expect("persist decoding error");
407-
let shard_id = value.expect("persist decoding error");
408-
(table_key, shard_id)
405+
.filter_map(|(data, _ts, _diff)| {
406+
if let (Ok(table_key), Ok(shard_id)) = data {
407+
Some((table_key, shard_id))
408+
} else {
409+
// If we can't decode the data, it has likely been written by a newer version, so
410+
// we ignore it.
411+
warn!("skipping unreadable migration shard entry: {data:?}");
412+
None
413+
}
409414
})
410415
.collect();
411416

@@ -415,11 +420,25 @@ async fn migrate_builtin_collections_incompatible(
415420
let storage_collection_metadata = txn.get_collection_metadata();
416421
for (table_key, shard_id) in global_id_shards.clone() {
417422
if table_key.build_version > build_version {
418-
halt!(
419-
"saw build version {}, which is greater than current build version {}",
420-
table_key.build_version,
421-
build_version
422-
);
423+
if read_only {
424+
halt!(
425+
"saw build version {}, which is greater than current build version {}",
426+
table_key.build_version,
427+
build_version
428+
);
429+
} else {
430+
// If we are in leader mode, and a newer (read-only) version has started a
431+
// migration, we must not allow ourselves to get fenced out! Continuing here might
432+
// confuse any read-only process running the migrations concurrently, but it's
433+
// better for the read-only env to crash than the leader.
434+
// TODO(#9755): handle this in a more principled way
435+
warn!(
436+
%table_key.build_version, %build_version,
437+
"saw build version which is greater than current build version",
438+
);
439+
global_id_shards.remove(&table_key);
440+
continue;
441+
}
423442
}
424443

425444
if !migrated_storage_collections.contains(&table_key.global_id)

0 commit comments

Comments
 (0)