diff --git a/common/retry.go b/common/retry.go index 1552fa052e..2e2577813e 100644 --- a/common/retry.go +++ b/common/retry.go @@ -862,6 +862,16 @@ func Retry(ctx context.Context, request OCIRetryableRequest, operation OCIOperat // use a one-based counter because it's easier to think about operation retry in terms of attempt numbering for currentOperationAttempt := uint(1); shouldContinueIssuingRequests(currentOperationAttempt, policyToUse.MaximumNumberAttempts); currentOperationAttempt++ { + // Check if context was cancelled before starting this attempt + select { + case <-ctx.Done(): + // Context cancelled: return whatever response we have so far + retrierChannel <- retrierResult{response, ctx.Err()} + return + default: + // Continue with the attempt + } + Debugln(fmt.Sprintf("operation attempt #%v", currentOperationAttempt)) // rewind body once needed if isSeekable { @@ -894,18 +904,24 @@ func Retry(ctx context.Context, request OCIRetryableRequest, operation OCIOperat return } Debugln(fmt.Sprintf("waiting %v before retrying operation", duration)) - // sleep before retrying the operation - <-time.After(duration) + // sleep before retrying the operation, but respect context cancellation + select { + case <-ctx.Done(): + // Context cancelled during sleep: return current response + retrierChannel <- retrierResult{response, ctx.Err()} + return + case <-time.After(duration): + // Sleep completed, continue to next retry attempt + } } retryEndTime := time.Now() Debugln(fmt.Sprintf("Total Latency for this API call is: %v ms", retryEndTime.Sub(retryStartTime).Milliseconds())) retrierChannel <- retrierResult{response, err} }() - select { - case <-ctx.Done(): - return response, ctx.Err() - case result := <-retrierChannel: - return result.response, result.err - } + // Wait for the goroutine to complete and send result through channel. + // The goroutine now checks ctx.Done() internally and will exit promptly + // when context is cancelled, so we don't need to race here. + result := <-retrierChannel + return result.response, result.err } diff --git a/common/retry_race_test.go b/common/retry_race_test.go new file mode 100644 index 0000000000..047bd8f4a1 --- /dev/null +++ b/common/retry_race_test.go @@ -0,0 +1,70 @@ +// Copyright (c) 2016, 2018, 2025, Oracle and/or its affiliates. All rights reserved. +// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license. + +package common + +import ( + "context" + "net/http" + "net/url" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +// TestRetry_RaceCondition demonstrates the data race bug in v65.101.1 +// +// To reproduce the race condition, run: +// +// go test -race -run TestRetry_RaceCondition ./common/ +// +// Expected output with BUGGY code: +// +// WARNING: DATA RACE +// Write at 0x... by goroutine X: +// common/retry.go:871 +// Previous read at 0x... by goroutine Y: +// common/retry.go:907 +// +// The race occurs because: +// 1. Line 807: var response OCIResponse (shared variable) +// 2. Line 871: Goroutine writes to 'response' +// 3. Line 907: Main thread reads 'response' when context is cancelled +// +// This creates a data race where both threads access the same memory +// location concurrently without synchronization. +func TestRetry_RaceCondition(t *testing.T) { + shouldRetryOperation := func(res OCIOperationResponse) bool { return true } + getNextDuration := func(res OCIOperationResponse) time.Duration { return 100 * time.Millisecond } + + pol := NewRetryPolicy(uint(5), shouldRetryOperation, getNextDuration) + req := retryableOCIRequest{retryPolicy: &pol} + + testURL, _ := url.Parse("http://example.com/test") + fakeOperation := func(ctx context.Context, request OCIRequest, _ *OCIReadSeekCloser, _ map[string]string) (OCIResponse, error) { + // Simulate slow operation to increase chance of race + time.Sleep(50 * time.Millisecond) + + httpResponse := http.Response{ + Header: http.Header{}, + StatusCode: 500, + Request: &http.Request{ + Method: "GET", + URL: testURL, + }, + } + return genericOCIResponse{RawResponse: &httpResponse}, nil + } + + // Cancel context immediately to trigger the race condition + // This causes the main thread to read 'response' at line 907 + // while the goroutine is writing to it at line 871 + ctx, cancelFn := context.WithCancel(context.Background()) + cancelFn() + + _, err := Retry(ctx, req, fakeOperation, pol) + + // Test may pass, but race detector will catch the bug + assert.Equal(t, context.Canceled, err) +} \ No newline at end of file