@@ -454,6 +454,8 @@ type options struct {
454454 metricsAgent * metrics.MetricsAgent
455455
456456 skippedImages sets.Set [string ]
457+
458+ successReported bool
457459}
458460
459461func bindOptions (flag * flag.FlagSet ) * options {
@@ -919,6 +921,11 @@ func (o *options) Report(errs ...error) {
919921 }
920922
921923 if len (errorToReport ) == 0 {
924+ // Skip reporting success if it was already reported early (before post steps)
925+ if o .successReported {
926+ logrus .Debug ("Success was already reported early, skipping duplicate report." )
927+ return
928+ }
922929 reporter .Report (nil )
923930 }
924931}
@@ -1057,26 +1064,56 @@ func (o *options) Run() []error {
10571064 return wrapped
10581065 }
10591066
1060- // Run each of the promotion steps concurrently
1067+ // Main graph completed successfully - report success immediately before post steps
1068+ mainGraphCompletedAt := time .Now ()
1069+ mainGraphDuration := mainGraphCompletedAt .Sub (start )
1070+ eventRecorder .Event (runtimeObject , coreapi .EventTypeNormal , "CiJobSucceeded" , eventJobDescription (o .jobSpec , o .namespace ))
1071+
1072+ // Report success to users immediately (post steps are best-effort cleanup)
1073+ reporter , loadErr := o .resultsOptions .Reporter (o .jobSpec , o .consoleHost )
1074+ if loadErr != nil {
1075+ logrus .WithError (loadErr ).Warn ("Could not load result reporting options, skipping early success report." )
1076+ } else {
1077+ reporter .Report (nil )
1078+ o .successReported = true
1079+ }
1080+
1081+ // Run each of the promotion steps concurrently (best-effort cleanup)
1082+ postStepsStart := time .Now ()
10611083 lenOfPromotionSteps := len (promotionSteps )
10621084 detailsChan := make (chan api.CIOperatorStepDetails , lenOfPromotionSteps )
10631085 errChan := make (chan error , lenOfPromotionSteps )
10641086 for _ , step := range promotionSteps {
10651087 go runPromotionStep (ctx , step , detailsChan , errChan , o .metricsAgent )
10661088 }
1089+ postStepsFailed := false
10671090 for i := 0 ; i < lenOfPromotionSteps ; i ++ {
10681091 select {
10691092 case details := <- detailsChan :
10701093 graph .MergeFrom (details )
10711094 case err := <- errChan :
10721095 errorDesc := fmt .Sprintf ("post step failed while %s. with error: %v" , eventJobDescription (o .jobSpec , o .namespace ), err )
10731096 eventRecorder .Event (runtimeObject , coreapi .EventTypeWarning , "PostStepFailed" , errorDesc )
1074- return []error {results .ForReason ("executing_post" ).WithError (err ).Unwrap ()} // If any of the promotion steps fail, it is considered a failure
1097+ logrus .WithError (err ).Warn ("Post step failed, but job success was already reported. Continuing with cleanup." )
1098+ postStepsFailed = true
1099+ // Post step failures don't affect job success (already reported), but we still record them
10751100 }
10761101 }
10771102
1078- eventRecorder .Event (runtimeObject , coreapi .EventTypeNormal , "CiJobSucceeded" , eventJobDescription (o .jobSpec , o .namespace ))
1079- o .metricsAgent .Record (metrics .NewInsightsEvent (metrics .InsightExecutionCompleted , metrics.Context {"duration_seconds" : time .Since (start ).Seconds (), "success" : true }))
1103+ // Record final metrics including post steps duration
1104+ postStepsDuration := time .Since (postStepsStart )
1105+ totalDuration := time .Since (start )
1106+ metricsContext := metrics.Context {
1107+ "duration_seconds" : totalDuration .Seconds (),
1108+ "main_graph_duration_seconds" : mainGraphDuration .Seconds (),
1109+ "post_steps_duration_seconds" : postStepsDuration .Seconds (),
1110+ "time_saved_seconds" : postStepsDuration .Seconds (),
1111+ "success" : true ,
1112+ }
1113+ if postStepsFailed {
1114+ metricsContext ["post_steps_failed" ] = true
1115+ }
1116+ o .metricsAgent .Record (metrics .NewInsightsEvent (metrics .InsightExecutionCompleted , metricsContext ))
10801117
10811118 return nil
10821119 })
0 commit comments