diff --git a/jenkins/BuildDockerImage.groovy b/jenkins/BuildDockerImage.groovy index ca03be65b33..7af79088277 100644 --- a/jenkins/BuildDockerImage.groovy +++ b/jenkins/BuildDockerImage.groovy @@ -333,7 +333,7 @@ def buildImage(config, imageKeyToTag) } } - args += prepareWheelFromBuildStage(dockerfileStage, arch) + def buildWheelArgs = prepareWheelFromBuildStage(dockerfileStage, arch) // Avoid the frequency of OOM issue when building the wheel if (target == "trtllm") { if (arch == "x86_64") { @@ -346,15 +346,34 @@ def buildImage(config, imageKeyToTag) sh "env | sort" def randomSleep = (Math.random() * 600 + 600).toInteger() trtllm_utils.llmExecStepWithRetry(this, script: "docker pull ${TRITON_IMAGE}:${TRITON_BASE_TAG}", sleepInSecs: randomSleep, numRetries: 6, shortCommondRunTimeMax: 7200) - trtllm_utils.llmExecStepWithRetry(this, script: """ - cd ${LLM_ROOT} && make -C docker ${target}_${action} \ - BASE_IMAGE=${BASE_IMAGE} \ - TRITON_IMAGE=${TRITON_IMAGE} \ - TORCH_INSTALL_TYPE=${torchInstallType} \ - IMAGE_WITH_TAG=${imageWithTag} \ - STAGE=${dockerfileStage} \ - BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} - """, sleepInSecs: randomSleep, numRetries: 6, shortCommondRunTimeMax: 7200) + try { + trtllm_utils.llmExecStepWithRetry(this, script: """ + cd ${LLM_ROOT} && make -C docker ${target}_${action} \ + BASE_IMAGE=${BASE_IMAGE} \ + TRITON_IMAGE=${TRITON_IMAGE} \ + TORCH_INSTALL_TYPE=${torchInstallType} \ + IMAGE_WITH_TAG=${imageWithTag} \ + STAGE=${dockerfileStage} \ + BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} ${buildWheelArgs} + """, sleepInSecs: randomSleep, numRetries: 6, shortCommondRunTimeMax: 7200) + } catch (InterruptedException ex) { + throw ex + } catch (Exception ex) { + if (buildWheelArgs.trim().isEmpty()) { + throw ex + } + echo "Build failed with wheel arguments, retrying without them" + buildWheelArgs = "" + trtllm_utils.llmExecStepWithRetry(this, script: """ + cd ${LLM_ROOT} && make -C docker ${target}_${action} \ + BASE_IMAGE=${BASE_IMAGE} \ + TRITON_IMAGE=${TRITON_IMAGE} \ + TORCH_INSTALL_TYPE=${torchInstallType} \ + IMAGE_WITH_TAG=${imageWithTag} \ + STAGE=${dockerfileStage} \ + BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} ${buildWheelArgs} + """, sleepInSecs: randomSleep, numRetries: 2, shortCommondRunTimeMax: 7200) + } if (target == "ngc-release") { imageKeyToTag["NGC Release Image ${config.arch}"] = imageWithTag } @@ -369,7 +388,7 @@ def buildImage(config, imageKeyToTag) TORCH_INSTALL_TYPE=${torchInstallType} \ IMAGE_WITH_TAG=${customImageWithTag} \ STAGE=${dockerfileStage} \ - BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} + BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} ${buildWheelArgs} """ } }