From da2a9e1e45e3c82d0006375f10670b6cc1560a67 Mon Sep 17 00:00:00 2001 From: xing-yang Date: Tue, 23 Sep 2025 11:10:57 -0400 Subject: [PATCH] Change codes.ResourceExhausted from non-final to final error --- pkg/sidecar-controller/snapshot_controller.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pkg/sidecar-controller/snapshot_controller.go b/pkg/sidecar-controller/snapshot_controller.go index 73b8a2c88..f1b022fad 100644 --- a/pkg/sidecar-controller/snapshot_controller.go +++ b/pkg/sidecar-controller/snapshot_controller.go @@ -744,12 +744,19 @@ func isCSIFinalError(err error) bool { } switch st.Code() { case codes.Canceled, // gRPC: Client Application cancelled the request - codes.DeadlineExceeded, // gRPC: Timeout - codes.Unavailable, // gRPC: Server shutting down, TCP connection broken - previous CreateSnapshot() may be still in progress. - codes.ResourceExhausted, // gRPC: Server temporarily out of resources - previous CreateSnapshot() may be still in progress. - codes.Aborted: // CSI: Operation pending for Snapshot + codes.DeadlineExceeded, // gRPC: Timeout + codes.Unavailable, // gRPC: Server shutting down, TCP connection broken - previous CreateSnapshot() may be still in progress. + codes.Aborted: // CSI: Operation pending for Snapshot return false } + // Note: codes.ResourceExhausted is treated as a final error. + // gRPC: Server out of resources. + // However, it also could be from the transport layer for "message size exceeded". + // Cannot be decided properly here and needs to be resolved in the spec + // https://github.com/container-storage-interface/spec/issues/419. + // What we assume here for now is that message size limits are large enough that + // the error really comes from the CSI driver. + // All other errors mean that creating snapshot either did not // even start or failed. It is for sure not in progress. return true