Skip to content

Commit dd5c458

Browse files
authored
Add retries in case jenkins job has a build already enqueued (#1203)
Add retry mechanism when triggering a new build of a Jenkins job. This retry mechanism also allows to set a retry with exponential backoff.
1 parent 74a268e commit dd5c458

File tree

4 files changed

+95
-11
lines changed

4 files changed

+95
-11
lines changed

.buildkite/pipeline.package-storage-publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ steps:
1919
command: ".buildkite/scripts/signAndPublishPackage.sh"
2020
depends_on:
2121
- build-package
22-
timeout_in_minutes: 30
22+
timeout_in_minutes: 90
2323
agents:
2424
provider: "gcp"
2525
image: family/core-ubuntu-2004

.buildkite/scripts/triggerJenkinsJob/jenkins/jenkins.go

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ type JenkinsClient struct {
1717
client *gojenkins.Jenkins
1818
}
1919

20+
type Options struct {
21+
WaitingTime time.Duration
22+
MaxWaitingTime time.Duration
23+
GrowthFactor float64
24+
Retries int
25+
}
26+
2027
func NewJenkinsClient(ctx context.Context, host, user, token string) (*JenkinsClient, error) {
2128
jenkins, err := gojenkins.CreateJenkins(nil, host, user, token).Init(ctx)
2229
if err != nil {
@@ -28,12 +35,28 @@ func NewJenkinsClient(ctx context.Context, host, user, token string) (*JenkinsCl
2835
}, nil
2936
}
3037

31-
func (j *JenkinsClient) RunJob(ctx context.Context, jobName string, async bool, params map[string]string) error {
32-
queueId, err := j.client.BuildJob(ctx, jobName, params)
33-
if err != nil {
34-
fmt.Printf("error running job %s : %s\n", jobName, err)
38+
func (j *JenkinsClient) RunJob(ctx context.Context, jobName string, async bool, params map[string]string, opts Options) error {
39+
log.Printf("Building job %s", jobName)
40+
var queueId int64
41+
42+
r := retry(func(ctx context.Context) error {
43+
var err error
44+
queueId, err = j.client.BuildJob(ctx, jobName, params)
45+
if err != nil {
46+
return fmt.Errorf("error running job %s: %w", jobName, err)
47+
}
48+
49+
if queueId != 0 {
50+
return nil
51+
}
52+
return fmt.Errorf("already running %s?", jobName)
53+
54+
}, opts.Retries, opts.GrowthFactor, opts.WaitingTime, opts.MaxWaitingTime)
55+
56+
if err := r(ctx); err != nil {
3557
return err
3658
}
59+
3760
build, err := j.getBuildFromJobAndQueueID(ctx, jobName, queueId)
3861
if err != nil {
3962
return err
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package jenkins
6+
7+
import (
8+
"context"
9+
"log"
10+
"math"
11+
"time"
12+
)
13+
14+
type retryableFunction func(context.Context) error
15+
16+
func minDuration(a, b time.Duration) time.Duration {
17+
if a < b {
18+
return a
19+
}
20+
return b
21+
}
22+
23+
func retry(f retryableFunction, retries int, growthFactor float64, delay, maxDelay time.Duration) retryableFunction {
24+
return func(ctx context.Context) error {
25+
delaySeconds := delay.Seconds()
26+
for r := 0; ; r++ {
27+
err := f(ctx)
28+
if err == nil || r >= retries {
29+
// Return when there is no error or the maximum amount
30+
// of retries is reached.
31+
return err
32+
}
33+
34+
waitingTimeSeconds := math.Pow(growthFactor, float64(r)) * delaySeconds
35+
waitingTime := time.Duration(waitingTimeSeconds) * time.Second
36+
waitingTime = minDuration(waitingTime, maxDelay)
37+
38+
log.Printf("Function failed, retrying in %v -> %.2f", waitingTime, waitingTimeSeconds)
39+
40+
select {
41+
case <-time.After(waitingTime):
42+
case <-ctx.Done():
43+
return ctx.Err()
44+
}
45+
}
46+
return nil
47+
}
48+
}

.buildkite/scripts/triggerJenkinsJob/main.go

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"log"
1212
"os"
1313
"strings"
14+
"time"
1415

1516
"github.com/elastic/trigger-jenkins-buildkite-plugin/jenkins"
1617
)
@@ -44,6 +45,11 @@ func jenkinsJobOptions() []string {
4445

4546
func main() {
4647
jenkinsJob := flag.String("jenkins-job", "", fmt.Sprintf("Jenkins job to trigger. Allowed values: %s", strings.Join(jenkinsJobOptions(), " ,")))
48+
waitingTime := flag.Duration("waiting-time", 5*time.Second, fmt.Sprintf("Waiting period between each retry"))
49+
growthFactor := flag.Float64("growth-factor", 1.25, fmt.Sprintf("Growth-Factor used for exponential backoff delays"))
50+
retries := flag.Int("retries", 20, fmt.Sprintf("Number of retries to trigger the job"))
51+
maxWaitingTime := flag.Duration("max-waiting-time", 60*time.Minute, fmt.Sprintf("Maximum waiting time per each retry"))
52+
4753
folderPath := flag.String("folder", "", "Path to artifacts folder")
4854
zipPackagePath := flag.String("package", "", "Path to zip package file (*.zip)")
4955
sigPackagePath := flag.String("signature", "", "Path to the signature file of the package file (*.zip.sig)")
@@ -62,11 +68,18 @@ func main() {
6268
log.Fatalf("error creating jenkins client")
6369
}
6470

71+
opts := jenkins.Options{
72+
WaitingTime: *waitingTime,
73+
Retries: *retries,
74+
GrowthFactor: *growthFactor,
75+
MaxWaitingTime: *maxWaitingTime,
76+
}
77+
6578
switch *jenkinsJob {
6679
case publishJobKey:
67-
err = runPublishingRemoteJob(ctx, client, *async, allowedJenkinsJobs[*jenkinsJob], *zipPackagePath, *sigPackagePath)
80+
err = runPublishingRemoteJob(ctx, client, *async, allowedJenkinsJobs[*jenkinsJob], *zipPackagePath, *sigPackagePath, opts)
6881
case signJobKey:
69-
err = runSignPackageJob(ctx, client, *async, allowedJenkinsJobs[*jenkinsJob], *folderPath)
82+
err = runSignPackageJob(ctx, client, *async, allowedJenkinsJobs[*jenkinsJob], *folderPath, opts)
7083
default:
7184
log.Fatal("unsupported jenkins job")
7285
}
@@ -76,18 +89,18 @@ func main() {
7689
}
7790
}
7891

79-
func runSignPackageJob(ctx context.Context, client *jenkins.JenkinsClient, async bool, jobName, folderPath string) error {
92+
func runSignPackageJob(ctx context.Context, client *jenkins.JenkinsClient, async bool, jobName, folderPath string, opts jenkins.Options) error {
8093
if folderPath == "" {
8194
return fmt.Errorf("missing parameter --gcs_input_path for")
8295
}
8396
params := map[string]string{
8497
"gcs_input_path": folderPath,
8598
}
8699

87-
return client.RunJob(ctx, jobName, async, params)
100+
return client.RunJob(ctx, jobName, async, params, opts)
88101
}
89102

90-
func runPublishingRemoteJob(ctx context.Context, client *jenkins.JenkinsClient, async bool, jobName, packagePath, signaturePath string) error {
103+
func runPublishingRemoteJob(ctx context.Context, client *jenkins.JenkinsClient, async bool, jobName, packagePath, signaturePath string, opts jenkins.Options) error {
91104
if packagePath == "" {
92105
return fmt.Errorf("missing parameter --gs_package_build_zip_path")
93106
}
@@ -102,5 +115,5 @@ func runPublishingRemoteJob(ctx context.Context, client *jenkins.JenkinsClient,
102115
"gs_package_signature_path": signaturePath,
103116
}
104117

105-
return client.RunJob(ctx, jobName, async, params)
118+
return client.RunJob(ctx, jobName, async, params, opts)
106119
}

0 commit comments

Comments
 (0)