From fc68a31b81046d1013432037ac113c60f0b2ffaa Mon Sep 17 00:00:00 2001 From: Vladimir Vereschaka Date: Mon, 25 Aug 2025 21:32:54 -0700 Subject: [PATCH 001/135] Update the project dependencies for the lldb-remote-linux builders. Add additional dependencies with the libc++/libc++abi/libunwind and compiler-rt libraries. Also removed unaffected LLDB_CAN_USE_LLDB_SERVER options. --- buildbot/osuosl/master/config/builders.py | 36 ++++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index b92c48e76..53f9169a7 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3640,7 +3640,20 @@ 'workernames': ["as-builder-9"], 'builddir': "lldb-remote-linux-ubuntu", 'factory': UnifiedTreeBuilder.getCmakeExBuildFactory( - depends_on_projects = ["llvm", "clang", "lld", "lldb"], + depends_on_projects = [ + 'llvm', + 'compiler-rt', + 'clang', + 'libunwind', + 'libcxx', + 'libcxxabi', + 'lld', + 'lldb', + ], + # Allow only these projects with LLVM_ENABLE_PROJECTS. + enable_projects = ["llvm", "clang", "lld", "lldb"], + # Use a proper list of runtimes (LLVM_ENABLE_RUNTIMES) from CrossWinToARMLinux.cmake. + # Avoid making it from a list of the depended projects. enable_runtimes = None, checks = [ "check-lldb-unit", @@ -3678,9 +3691,6 @@ "LLDB_ENABLE_CURSES" : "OFF", "LLDB_ENABLE_LZMA" : "OFF", "LLDB_ENABLE_LIBXML2" : "OFF", - # No need to build lldb-server during the first stage. - # We are going to build it for the target platform later. - "LLDB_CAN_USE_LLDB_SERVER" : "OFF", "LLDB_TEST_USER_ARGS" : util.Interpolate( "--env;ARCH_CFLAGS=-mcpu=cortex-a78;" \ "--platform-name;remote-linux;" \ @@ -3769,7 +3779,20 @@ 'workernames': ["as-builder-10"], 'builddir': "lldb-x-aarch64", 'factory': UnifiedTreeBuilder.getCmakeExBuildFactory( - depends_on_projects = ["llvm", "clang", "lld", "lldb"], + depends_on_projects = [ + 'llvm', + 'compiler-rt', + 'clang', + 'libunwind', + 'libcxx', + 'libcxxabi', + 'lld', + 'lldb', + ], + # Allow only these projects with LLVM_ENABLE_PROJECTS. + enable_projects = ["llvm", "clang", "lld", "lldb"], + # Use a proper list of runtimes (LLVM_ENABLE_RUNTIMES) from CrossWinToARMLinux.cmake. + # Avoid making it from a list of the depended projects. enable_runtimes = None, checks = [ "check-lldb-unit", @@ -3806,9 +3829,6 @@ "LLDB_ENABLE_CURSES" : "OFF", "LLDB_ENABLE_LZMA" : "OFF", "LLDB_ENABLE_LIBXML2" : "OFF", - # No need to build lldb-server during the first stage. - # We are going to build it for the target platform later. - "LLDB_CAN_USE_LLDB_SERVER" : "OFF", "LLDB_TEST_USER_ARGS" : util.Interpolate( "--env;ARCH_CFLAGS=-mcpu=cortex-a78;" \ "--platform-name;remote-linux;" \ From 69853114abf052ccabe7061ef201b7bea20b2712 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 23 Jul 2025 09:13:58 -0700 Subject: [PATCH 002/135] [CI] Disable Soft Delete on GCS Buckets This patch disable soft deletion on the GCS Buckets used to cache object files. Soft deletion is a feature on GCS buckets where when files are deleted they are kept around for some duration of time (~7 days by default) in a "soft" deleted state before being fully deleted. Given we do not really lose anything by deleting object files and can easily regenerate them, it does not make sense to pay for storing them for an additional seven days. By the end of the time they will be long outdated. Reviewers: dschuff, cmtice, lnihlen, gburgessiv, Keenuts Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/518 --- premerge/gke_cluster/main.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 832d23c4f..b556fb265 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -171,6 +171,10 @@ resource "google_storage_bucket" "object_cache_linux" { uniform_bucket_level_access = true public_access_prevention = "enforced" + + soft_delete_policy { + retention_duration_seconds = 0 + } } resource "google_storage_bucket" "object_cache_windows" { @@ -179,6 +183,10 @@ resource "google_storage_bucket" "object_cache_windows" { uniform_bucket_level_access = true public_access_prevention = "enforced" + + soft_delete_policy { + retention_duration_seconds = 0 + } } resource "google_service_account" "object_cache_linux_gsa" { From 10a4227bd8c352b1f26ddde86698b788892a3a6d Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 23 Jul 2025 09:14:33 -0700 Subject: [PATCH 003/135] [CI] Add Lifecycle Rules to GCS Buckets This patch adds lifecycle rules to the GCS buckets to delete old objects. We do not want to keep object files around for very long as they quickly become out of date. GCS does not keep track of the last time a file was fetched, so we have to rely on the creation date. The granularity for when to delete an object can only be specified in days, so we set it to one day for now. Reviewers: dschuff, lnihlen, cmtice, gburgessiv, Keenuts Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/519 --- premerge/gke_cluster/main.tf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index b556fb265..47e70fbf0 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -175,6 +175,15 @@ resource "google_storage_bucket" "object_cache_linux" { soft_delete_policy { retention_duration_seconds = 0 } + + lifecycle_rule { + action { + type = "Delete" + } + condition { + days_since_noncurrent_time = 1 + } + } } resource "google_storage_bucket" "object_cache_windows" { @@ -187,6 +196,15 @@ resource "google_storage_bucket" "object_cache_windows" { soft_delete_policy { retention_duration_seconds = 0 } + + lifecycle_rule { + action { + type = "Delete" + } + condition { + days_since_noncurrent_time = 1 + } + } } resource "google_service_account" "object_cache_linux_gsa" { From f530dc78a6182a05469cd785af23a5701eedded6 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 23 Jul 2025 16:17:09 +0000 Subject: [PATCH 004/135] [CI] Use age Instead of days_since_noncurrent_time I was apparently following the example from the documentation too closely. We do not want to use days_since_noncurrent time since it only applies to versioned objects which we do not have. Use age like I originally intentioned. --- premerge/gke_cluster/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 47e70fbf0..2294c21e0 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -181,7 +181,7 @@ resource "google_storage_bucket" "object_cache_linux" { type = "Delete" } condition { - days_since_noncurrent_time = 1 + age = 1 } } } @@ -202,7 +202,7 @@ resource "google_storage_bucket" "object_cache_windows" { type = "Delete" } condition { - days_since_noncurrent_time = 1 + age = 1 } } } From 6ea03ec2e07b45fcc19a4ce9e20c8e4410942dc2 Mon Sep 17 00:00:00 2001 From: dyung Date: Wed, 23 Jul 2025 16:32:19 -0400 Subject: [PATCH 005/135] Add personal email to the list of notifications for my bots. (#515) --- buildbot/osuosl/master/config/status.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/buildbot/osuosl/master/config/status.py b/buildbot/osuosl/master/config/status.py index fa5c530a6..99d9f159d 100644 --- a/buildbot/osuosl/master/config/status.py +++ b/buildbot/osuosl/master/config/status.py @@ -343,7 +343,8 @@ def getReporters(): reporters.MailNotifier( fromaddr = status_email_fromaddr, sendToInterestedUsers = False, - extraRecipients = ["douglas.yung@sony.com"], + extraRecipients = ["douglas.yung@sony.com", + "douglasyung.llvm@gmail.com" ], generators = [ utils.LLVMDefaultBuildStatusGenerator( builders = [ From 42bb0bf8f738fcc001d134ad0118a6e257bd8950 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Wed, 23 Jul 2025 14:17:18 -0700 Subject: [PATCH 006/135] Remove libc arm32 buildbot (#517) Remove the arm32 buildbot as it has been replaced with the qemu arm32 buildbot. --- buildbot/osuosl/master/config/builders.py | 9 --------- buildbot/osuosl/master/config/status.py | 1 - buildbot/osuosl/master/config/workers.py | 3 --- 3 files changed, 13 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 53f9169a7..dae5da4a1 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -2321,15 +2321,6 @@ depends_on_projects=['llvm', 'libc', 'clang', 'clang-tools-extra'], extra_args=['--debug'])}, - {'name' : 'libc-arm32-debian-dbg', - 'tags' : ["libc"], - 'workernames' : ['libc-arm32-debian'], - 'builddir': 'libc-arm32-debian-dbg', - 'factory' : AnnotatedBuilder.getAnnotatedBuildFactory( - script="libc-linux.py", - depends_on_projects=['llvm', 'libc', 'clang', 'clang-tools-extra'], - extra_args=['--debug'])}, - {'name' : 'libc-arm32-qemu-debian-dbg', 'tags' : ["libc"], 'workernames' : ['libc-arm32-qemu-debian'], diff --git a/buildbot/osuosl/master/config/status.py b/buildbot/osuosl/master/config/status.py index 99d9f159d..4f1a6ef13 100644 --- a/buildbot/osuosl/master/config/status.py +++ b/buildbot/osuosl/master/config/status.py @@ -290,7 +290,6 @@ def getReporters(): builders = [ "libc-aarch64-ubuntu-dbg", "libc-aarch64-ubuntu-fullbuild-dbg", - "libc-arm32-debian-dbg", "libc-arm32-qemu-debian-dbg", "libc-riscv64-debian-dbg", "libc-riscv64-debian-fullbuild-dbg", diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 284ec3bd3..dec8d66a1 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -161,9 +161,6 @@ def get_all(): # Windows x86_64 32 CPUs, 125 GB RAM create_worker("libc-x86_64-windows", properties={'jobs': 32}, max_builds=2), - # Debian arm32 single core, 512 MB RAM backed by 32 GB swap memory - create_worker("libc-arm32-debian", properties={'jobs': 1}, max_builds=1), - # Debian x86_64 AMD Rome 16 CPUs, 64 GB RAM create_worker("libc-arm32-qemu-debian", properties={'jobs': 16}, max_builds=1), From ba27c4f8bf01ff2883c2b9228b5739a668457408 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 24 Jul 2025 11:03:16 +0100 Subject: [PATCH 007/135] Merge clang-armv7-lnt into clang-armv7-2stage (#520) This removes the single stage test suite bot, then adds stage 1 check and final test suite run to the 2 stage bot. Which is the equivalent but 1 fewer worker. It'll slow down the 2 stage builds a bit but there's no a lot of activity in the 32-bit Arm area anyway, so I don't think it's a problem. --- buildbot/osuosl/master/config/builders.py | 21 +++++---------------- buildbot/osuosl/master/config/workers.py | 1 - 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index dae5da4a1..bdec055d3 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -344,21 +344,6 @@ checkout_lld=False, extra_cmake_args=["-DLLVM_TARGETS_TO_BUILD='ARM'"])}, - # ARMv7 LNT test-suite in test-only mode - {'name' : "clang-armv7-lnt", - 'tags' : ["clang"], - 'workernames' : ["linaro-clang-armv7-lnt"], - 'builddir': "clang-armv7-lnt", - 'factory' : ClangBuilder.getClangCMakeBuildFactory( - clean=False, - checkout_compiler_rt=False, - checkout_lld=False, - checks=[], - runTestSuite=True, - testsuite_flags=[ - '--cppflags', '-mcpu=cortex-a15 -marm', - '--threads=32', '--build-threads=32'])}, - ## ARMv7 check-all 2-stage {'name' : "clang-armv7-2stage", 'tags' : ["clang"], @@ -369,7 +354,11 @@ checkout_compiler_rt=False, checkout_lld=False, useTwoStage=True, - testStage1=False, + testStage1=True, + runTestSuite=True, + testsuite_flags=[ + '--cppflags', '-mcpu=cortex-a15 -marm', + '--threads=32', '--build-threads=32'], extra_cmake_args=[ "-DCMAKE_C_FLAGS='-mcpu=cortex-a15 -marm'", "-DCMAKE_CXX_FLAGS='-mcpu=cortex-a15 -marm'"])}, diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index dec8d66a1..933b295c2 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -16,7 +16,6 @@ def get_all(): create_worker("as-worker-4", properties={'jobs' : 24}, max_builds=2), # ARMv7/ARMv8 Linaro workers - create_worker("linaro-clang-armv7-lnt", max_builds=1), create_worker("linaro-clang-armv7-2stage", max_builds=1), create_worker("linaro-clang-armv7-global-isel", max_builds=1), create_worker("linaro-clang-armv7-vfpv3-2stage", max_builds=1), From 933488343484b8679ee3db8ee5af7e9df721a696 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 24 Jul 2025 11:04:15 +0100 Subject: [PATCH 008/135] Merge clang-aarch64-full-2stage with clang-aarch64-lld-2stage (#521) And keep the latter. Since we (Linaro) don't commit to testing ld specifically, and recommend lld anyway. The ld bot had some extra testing that I've enabled for the lld version in this change. --- buildbot/osuosl/master/config/builders.py | 43 +++++++---------------- buildbot/osuosl/master/config/workers.py | 1 - 2 files changed, 13 insertions(+), 31 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index bdec055d3..e7547561f 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -403,22 +403,33 @@ checkout_lld=False, extra_cmake_args=["-DLLVM_TARGETS_TO_BUILD='AArch64'"])}, - ## AArch64 check-all + LLD + test-suite 2-stage + # AArch64 2 stage build with lld, flang, compiler-rt, test-suite and SVE/SME + # mlir integration tests. {'name' : "clang-aarch64-lld-2stage", 'tags' : ["lld"], 'workernames' : ["linaro-clang-aarch64-lld-2stage"], 'builddir':"clang-aarch64-lld-2stage", 'factory' : ClangBuilder.getClangCMakeBuildFactory( clean=True, + checkout_flang=True, + checkout_lld=True, useTwoStage=True, runTestSuite=True, + env={ + 'NO_STOP_MESSAGE':'1', # For Fortran test-suite + }, testsuite_flags=[ '--cppflags', '-mcpu=neoverse-n1 -fuse-ld=lld', '--threads=32', '--build-threads=32'], extra_cmake_args=[ "-DCMAKE_C_FLAGS='-mcpu=neoverse-n1'", "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-n1'", - "-DLLVM_ENABLE_LLD=True"])}, + "-DLLVM_ENABLE_LLD=True", + "-DLLVM_LIT_ARGS='-v'", + "-DMLIR_INCLUDE_INTEGRATION_TESTS=True", + "-DMLIR_RUN_ARM_SVE_TESTS=True", + "-DMLIR_RUN_ARM_SME_TESTS=True", + "-DARM_EMULATOR_EXECUTABLE=qemu-aarch64"])}, ## AArch64 run test-suite at -O0 (GlobalISel is now default). {'name' : "clang-aarch64-global-isel", @@ -455,34 +466,6 @@ # lld tests cause us to hit thread limits "-DLLVM_ENABLE_THREADS=OFF"])}, - # AArch64 check-all + flang + compiler-rt + test-suite + SVE/SME - # mlir-integration-tests 2-stage - {'name' : "clang-aarch64-full-2stage", - 'tags' : ["clang"], - 'workernames' : ["linaro-clang-aarch64-full-2stage"], - 'builddir': "clang-aarch64-full-2stage", - 'factory' : ClangBuilder.getClangCMakeBuildFactory( - clean=True, - checkout_flang=True, - checkout_lld=True, - useTwoStage=True, - testStage1=False, - runTestSuite=True, - env={ - 'NO_STOP_MESSAGE':'1', # For Fortran test-suite - }, - testsuite_flags=[ - '--cppflags', '-mcpu=neoverse-n1', - '--threads=32', '--build-threads=32'], - extra_cmake_args=[ - "-DCMAKE_C_FLAGS='-mcpu=neoverse-n1'", - "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-n1'", - "-DLLVM_LIT_ARGS='-v'", - "-DMLIR_INCLUDE_INTEGRATION_TESTS=True", - "-DMLIR_RUN_ARM_SVE_TESTS=True", - "-DMLIR_RUN_ARM_SME_TESTS=True", - "-DARM_EMULATOR_EXECUTABLE=qemu-aarch64"])}, - # All SVE (as opposed to SVE2) builders are using optimisation flags # for Graviton 3 "balanced" from # https://github.com/aws/aws-graviton-getting-started/blob/main/c-c++.md. diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 933b295c2..01ede0558 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -27,7 +27,6 @@ def get_all(): create_worker("linaro-clang-aarch64-quick", max_builds=1), create_worker("linaro-clang-aarch64-lld-2stage", max_builds=1), create_worker("linaro-clang-aarch64-global-isel", max_builds=1), - create_worker("linaro-clang-aarch64-full-2stage", max_builds=1), create_worker("linaro-lldb-aarch64-ubuntu", max_builds=1), create_worker("linaro-flang-aarch64-dylib", max_builds=1), create_worker("linaro-flang-aarch64-sharedlibs", max_builds=1), From 4df1f0b9bf021320ba91f648776e242b19308ea8 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 24 Jul 2025 14:15:20 -0700 Subject: [PATCH 009/135] [CI] Add maintenance window to premerge clusters (#522) This patch adds an explicit maintenance window to the premerge clusters. This is in response to some control plane upgrades that we recieved notice of that will prevent access to the k8s control plane for ~15 minutes which means we will not be able to start new jobs. We should have this anyways though as the current node upgrade strategy also breaks jobs. --- premerge/gke_cluster/main.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 2294c21e0..ea92a6a3a 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -19,6 +19,16 @@ resource "google_container_cluster" "llvm_premerge" { workload_identity_config { workload_pool = "llvm-premerge-checks.svc.id.goog" } + + # We prefer that maintenance is done on weekends between 02:00 and 05:00 + # UTC when commit traffic is low to avoid interruptions. + maintenance_policy { + recurring_window { + start_time = "2025-07-24T02:00:00Z" + end_time = "2025-07-24T05:00:00Z" + recurrence = "FREQ=WEEKLY;BYDAY=SA,SU" + } + } } resource "google_container_node_pool" "llvm_premerge_linux_service" { From 0bb84c69bc35c997374652338e68aa715fbc239f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 24 Jul 2025 21:21:04 +0000 Subject: [PATCH 010/135] [CI] Bump maintenance window end date This patch bumps the maintenance window end date so we can actually apply the changes. GKE requires that there be at least 48 hours of maintenance time available in >=4h blocks for every 32 day segment. The original rules did not meet this. These changes allow us to deploy and should not sacrifice too much in terms of ensuring we minimize commit traffic during the maintenance periods. --- premerge/gke_cluster/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index ea92a6a3a..846d8b92e 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -25,7 +25,7 @@ resource "google_container_cluster" "llvm_premerge" { maintenance_policy { recurring_window { start_time = "2025-07-24T02:00:00Z" - end_time = "2025-07-24T05:00:00Z" + end_time = "2025-07-24T08:00:00Z" recurrence = "FREQ=WEEKLY;BYDAY=SA,SU" } } From e58068fb2798a9c8f90d8d651dd14f79f627b049 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 24 Jul 2025 16:00:12 -0700 Subject: [PATCH 011/135] [CI] Attach pod disruption budgets to runner pods (#523) This patch adds some pod disruption budgets to runner pods that just sets the minimum number of available pods to the maximum. This ensure that the number of pods that k8s calculates can be disrupted is zero. This means that when GKE is updating the node pool, it must wait an hour before forcibly evicting the pod, giving it time to finish. Before this, when GKE wanted to upgrade a node, it would forcibly evict the pod very quickly (theoretically after the grace period which has a default of 30s) not realizing it is stateful. --- premerge/pod_disruption_budget.yaml | 10 ++++++++++ premerge/premerge_resources/main.tf | 30 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 premerge/pod_disruption_budget.yaml diff --git a/premerge/pod_disruption_budget.yaml b/premerge/pod_disruption_budget.yaml new file mode 100644 index 000000000..79bcaa623 --- /dev/null +++ b/premerge/pod_disruption_budget.yaml @@ -0,0 +1,10 @@ +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: runner-set-pdb + namespace: ${ runner_set_name } +spec: + minAvailable: ${ min_pod_count } + selector: + matchLabels: + actions.github.com/scale-set-name: ${ runner_set_name } diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index 10b020fa6..546880f35 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -258,6 +258,36 @@ resource "kubernetes_service_account" "windows_2022_object_cache_ksa" { depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_runners] } +# We set up pod disruption budgets here. We need one per namespace and we need +# to set the min pod count to the maximum number of runner pods that can +# possibly exist so we never have a number of disruptible pods greater than +# zero. + +resource "kubernetes_manifest" "linux_runners_disruption_budget" { + manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-linux-runners", min_pod_count : 16 })) + depends_on = [kubernetes_namespace.llvm_premerge_linux_runners] +} + +resource "kubernetes_manifest" "windows_2022_runners_disruption_budget" { + manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-windows-2022-runners", min_pod_count : 16 })) + depends_on = [kubernetes_namespace.llvm_premerge_linux_runners] +} + +resource "kubernetes_manifest" "libcxx_runners_disruption_budget" { + manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-libcxx-runners", min_pod_count : 32 })) + depends_on = [kubernetes_namespace.llvm_premerge_linux_runners] +} + +resource "kubernetes_manifest" "libcxx_release_runners_disruption_budget" { + manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-libcxx-release-runners", min_pod_count : 32 })) + depends_on = [kubernetes_namespace.llvm_premerge_linux_runners] +} + +resource "kubernetes_manifest" "libcxx_next_runners_disruption_budget" { + manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-libcxx-next-runners", min_pod_count : 32 })) + depends_on = [kubernetes_namespace.llvm_premerge_linux_runners] +} + resource "kubernetes_namespace" "grafana" { metadata { name = "grafana" From c0be49c3b8719dc82c9772fa4631dd657de9a02a Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Fri, 25 Jul 2025 12:04:32 -0400 Subject: [PATCH 012/135] [CI] Increase resource allocation for operational metrics cronjob (#524) The cronjob for scraping llvm-project commit data consistently fails due to running out of memory. `git clone https://github.com/llvm/llvm-project.git` consistently uses around 1.5 GiB of RAM, which is well above both the request and limit currently set for the cronjob. Thus, the job is repeatedly killed more memory than is allocated. Increasing the resource allocation should get this cronjob running as intended. --- premerge/operational_metrics_cronjob.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/premerge/operational_metrics_cronjob.yaml b/premerge/operational_metrics_cronjob.yaml index 7c9630147..ce09f4ad3 100644 --- a/premerge/operational_metrics_cronjob.yaml +++ b/premerge/operational_metrics_cronjob.yaml @@ -38,8 +38,8 @@ spec: resources: requests: cpu: "250m" - memory: "256Mi" + memory: "1.75Gi" limits: - cpu: "1" - memory: "512Mi" + cpu: "2" + memory: "2Gi" restartPolicy: OnFailure From ca2b6755f282a2e81f582609aa07744e1823eb64 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 25 Jul 2025 16:56:37 +0000 Subject: [PATCH 013/135] [CI] Fix Terraform Reconciliation Issue around Units This patch updates the operationa metrics Cron job to use Mi for the memory requests rather than Gi as k8s will automatically convert them to Mi for decimal number of Gi which terraform then thinks differs from what it tried to apply. This leads to terraform trying to reapply the setting everytime one runs terraform apply despite the settings already being equivalent. --- premerge/operational_metrics_cronjob.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/premerge/operational_metrics_cronjob.yaml b/premerge/operational_metrics_cronjob.yaml index ce09f4ad3..e2a0b965c 100644 --- a/premerge/operational_metrics_cronjob.yaml +++ b/premerge/operational_metrics_cronjob.yaml @@ -38,7 +38,10 @@ spec: resources: requests: cpu: "250m" - memory: "1.75Gi" + # We explicitly use Mi here instead of a decimal number of + # Gi because k8s will automatically convert to Mi which + # terraform then thinks differs from what it intended to apply. + memory: "1792Mi" limits: cpu: "2" memory: "2Gi" From e293fcc3d7db30f8e0431a822d67296ca57672da Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Fri, 25 Jul 2025 13:45:54 -0700 Subject: [PATCH 014/135] Update libc arm32 cmake flags to specify the sysroot --- zorg/buildbot/builders/annotated/libc-linux.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/libc-linux.py b/zorg/buildbot/builders/annotated/libc-linux.py index 0349a3918..87171a88a 100644 --- a/zorg/buildbot/builders/annotated/libc-linux.py +++ b/zorg/buildbot/builders/annotated/libc-linux.py @@ -121,7 +121,12 @@ def main(argv): if arm32_build and qemu_build: cmake_args.append('-DLIBC_TARGET_TRIPLE=arm-linux-gnueabihf') - cmake_args.append('-DLIBC_TEST_COMPILE_OPTIONS_DEFAULT=-static') + cmake_args.append('-DCMAKE_SYSROOT=/opt/sysroot-deb-armhf-stable') + cmake_args.append('-DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf') + cmake_args.append('-DCMAKE_CXX_COMPILER_TARGET=arm-linux-gnueabihf') + cmake_args.append('-DCMAKE_AR=/usr/bin/llvm-ar-20') + cmake_args.append('-DCMAKE_RANLIB=/usr/bin/llvm-ranlib-20') + cmake_args.append('-DLIBC_UNITTEST_ENV=QEMU_LD_PREFIX=/opt/sysroot-deb-armhf-stable') if bootstrap_build: cmake_root = 'llvm' From b4e60bcc86ef55315f98bf1d22d0ae9434e25ecc Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Fri, 25 Jul 2025 21:05:58 -0400 Subject: [PATCH 015/135] [CI] Validate scraped push commits via GitHub API (#514) As GitHubArchive BigQuery is known to be lossy, it is likely that we currently overestimate the number of commits made to llvm/llvm-project without an associated pull request. To remedy this, we can make calls to the [GitHub Event API](https://docs.github.com/en/rest/activity/events). While we want to avoid using the API to get information regarding every single commit made to LLVM, we can narrow our calls down to only commits that don't have any pull request data available via BigQuery. From those calls, we can determine if a "push" commit actually does have a pull request and, if it does, whether or not it has been approved. --- .../ops-container/process_llvm_commits.py | 108 +++++++++++++++++- 1 file changed, 105 insertions(+), 3 deletions(-) diff --git a/llvm-ops-metrics/ops-container/process_llvm_commits.py b/llvm-ops-metrics/ops-container/process_llvm_commits.py index fdf20cc91..5e5b04770 100644 --- a/llvm-ops-metrics/ops-container/process_llvm_commits.py +++ b/llvm-ops-metrics/ops-container/process_llvm_commits.py @@ -9,8 +9,13 @@ GRAFANA_URL = ( "https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write" ) +GITHUB_GRAPHQL_API_URL = "https://api.github.com/graphql" REPOSITORY_URL = "https://github.com/llvm/llvm-project.git" +# How many commits to query the GitHub GraphQL API for at a time. +# Querying too many commits at once often leads to the call failing. +GITHUB_API_BATCH_SIZE = 75 + # Number of days to look back for new commits # We allow some buffer time between when a commit is made and when it is queried # for reviews. This is allow time for any events to propogate in the GitHub @@ -44,6 +49,23 @@ AND JSON_VALUE(pr_event.payload, '$.pull_request.merge_commit_sha') IS NOT NULL """ +# Template GraphQL subquery to check if a commit has an associated pull request +# and whether that pull request has been reviewed and approved. +COMMIT_GRAPHQL_SUBQUERY_TEMPLATE = """ +commit_{commit_sha}: + object(oid:"{commit_sha}") {{ + ... on Commit {{ + associatedPullRequests(first: 1) {{ + totalCount + pullRequest: nodes {{ + number + reviewDecision + }} + }} + }} + }} +""" + @dataclasses.dataclass class LLVMCommitInfo: @@ -153,6 +175,85 @@ def query_for_reviews( return list(new_commits.values()) +def validate_push_commits( + new_commits: list[LLVMCommitInfo], github_token: str +) -> None: + """Validate that push commits don't have a pull request. + + To address lossiness of data from GitHub Archive BigQuery, we check each + commit to see if it actually has an associated pull request. + + Args: + new_commits: List of commits to validate. + github_token: The access token to use with the GitHub GraphQL API. + """ + + # Get all push commits from new commits and form their subqueries + commit_subqueries = [] + potential_push_commits = {} + for commit in new_commits: + if commit.has_pull_request: + continue + potential_push_commits[commit.commit_sha] = commit + commit_subqueries.append( + COMMIT_GRAPHQL_SUBQUERY_TEMPLATE.format(commit_sha=commit.commit_sha) + ) + logging.info("Found %d potential push commits", len(potential_push_commits)) + + # Query GitHub GraphQL API for pull requests associated with push commits + # We query in batches as large queries often fail + api_commit_data = {} + query_template = """ + query { + repository(owner:"llvm", name:"llvm-project"){ + %s + } + } + """ + num_batches = len(commit_subqueries) // GITHUB_API_BATCH_SIZE + 1 + logging.info("Querying GitHub GraphQL API in %d batches", num_batches) + for i in range(num_batches): + subquery_batch = commit_subqueries[ + i * GITHUB_API_BATCH_SIZE : (i + 1) * GITHUB_API_BATCH_SIZE + ] + query = query_template % "".join(subquery_batch) + + logging.info( + "Querying batch %d of %d (%d commits)", + i + 1, + num_batches, + len(subquery_batch), + ) + response = requests.post( + url=GITHUB_GRAPHQL_API_URL, + headers={ + "Authorization": f"bearer {github_token}", + }, + json={"query": query}, + ) + if response.status_code < 200 or response.status_code >= 300: + logging.error("Failed to query GitHub GraphQL API: %s", response.text) + api_commit_data.update(response.json()["data"]["repository"]) + + amend_count = 0 + for commit_sha, data in api_commit_data.items(): + # Verify that push commit has no pull requests + commit_sha = commit_sha.removeprefix("commit_") + if data["associatedPullRequests"]["totalCount"] == 0: + continue + + # Amend fields with new data from API + pull_request = data["associatedPullRequests"]["pullRequest"][0] + commit_info = potential_push_commits[commit_sha] + commit_info.has_pull_request = True + commit_info.pr_number = pull_request["number"] + commit_info.is_reviewed = pull_request["reviewDecision"] is not None + commit_info.is_approved = pull_request["reviewDecision"] == "APPROVED" + amend_count += 1 + + logging.info("Amended %d commits", amend_count) + + def upload_daily_metrics( grafana_api_key: str, grafana_metrics_userid: str, @@ -164,9 +265,6 @@ def upload_daily_metrics( grafana_api_key: The key to make API requests with. grafana_metrics_userid: The user to make API requests with. new_commits: List of commits to process & upload to Grafana. - - Returns: - None """ # Count each type of commit made approval_count = 0 @@ -200,6 +298,7 @@ def upload_daily_metrics( def main() -> None: + github_token = os.environ["GITHUB_TOKEN"] grafana_api_key = os.environ["GRAFANA_API_KEY"] grafana_metrics_userid = os.environ["GRAFANA_METRICS_USERID"] @@ -219,6 +318,9 @@ def main() -> None: logging.info("Querying for reviews of new commits.") new_commit_info = query_for_reviews(new_commits, date_to_scrape) + logging.info("Validating push commits.") + validate_push_commits(new_commit_info, github_token) + logging.info("Uploading metrics to Grafana.") upload_daily_metrics(grafana_api_key, grafana_metrics_userid, new_commit_info) From 62e64adfe2ef4e9902d00156b5e20dc7bb856b42 Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Mon, 28 Jul 2025 10:44:44 +0200 Subject: [PATCH 016/135] Increase timeout for clang-sparc64-linux to 1800 seconds (#513) The builds for clang-sparc64-linux currently fail with a timeout during the linking stage, so lets increase the timeout to 1800 seconds which is the timeout also used on the builder clang-solaris11-sparcv9. --- buildbot/osuosl/master/config/builders.py | 1 + 1 file changed, 1 insertion(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index e7547561f..d44dd4857 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -818,6 +818,7 @@ 'builddir': 'clang-sparc64-linux', 'factory' : ClangBuilder.getClangCMakeBuildFactory( clean=False, + timeout=1800, runTestSuite=True, checkout_clang_tools_extra=False, checkout_compiler_rt=False, From 1c61704cb957159f9608b9cea2e30f53110a6781 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 29 Jul 2025 21:35:59 +0000 Subject: [PATCH 017/135] [CI] Minor fixes in operation metris container build 1. Do not restrict pull request branch to allow hacking on in stacked PRs. 2. Remove some extra new lines at the bottom. 3. Rename the push job to better reflect the current context rather than where the job was copied from. --- .github/workflows/build-operations-metrics-container.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/build-operations-metrics-container.yml b/.github/workflows/build-operations-metrics-container.yml index c52d82db0..c938f373f 100644 --- a/.github/workflows/build-operations-metrics-container.yml +++ b/.github/workflows/build-operations-metrics-container.yml @@ -11,8 +11,6 @@ on: - .github/workflows/build-operations-metrics-container.yml - 'llvm-ops-metrics/ops-container/**' pull_request: - branches: - - main paths: - .github/workflows/build-operations-metrics-container.yml - 'llvm-ops-metrics/ops-container/**' @@ -55,7 +53,7 @@ jobs: path: ${{ steps.vars.outputs.container-filename }} retention-days: 14 - push-metrics-container: + push-operations-metrics-container: if: github.event_name == 'push' needs: - build-operations-metrics-container @@ -76,4 +74,3 @@ jobs: podman login -u ${{ github.actor }} -p $GITHUB_TOKEN ghcr.io podman push ${{ needs.build-operations-metrics-container.outputs.container-name-tag }} podman push ${{ needs.build-operations-metrics-container.outputs.container-name }}:latest - From 5de18ac47fbc454d971d1e959f57cc2188e71d3e Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 30 Jul 2025 07:57:47 +0100 Subject: [PATCH 018/135] [RISCV] Remove -force-tail-folding-style=data-with-evl from EVL configs This shouldn't be needed any more. --- zorg/buildbot/builders/annotated/rise-riscv-build.sh | 2 +- zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/zorg/buildbot/builders/annotated/rise-riscv-build.sh b/zorg/buildbot/builders/annotated/rise-riscv-build.sh index 4b213ef91..0c1bd54f5 100755 --- a/zorg/buildbot/builders/annotated/rise-riscv-build.sh +++ b/zorg/buildbot/builders/annotated/rise-riscv-build.sh @@ -29,7 +29,7 @@ case "$BUILDBOT_BUILDERNAME" in export BB_QEMU_MEM="64G" ;; "clang-riscv-rva23-evl-vec-2stage") - TARGET_CFLAGS="-march=rva23u64 -mllvm -force-tail-folding-style=data-with-evl -mllvm -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue" + TARGET_CFLAGS="-march=rva23u64 -mllvm -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue" export BB_IMG_DIR=$(pwd)/.. # TODO: Switch to specifying rva23u64 once qemu on the builder is # upgraded to a version that recognises it. diff --git a/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh b/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh index 60e4e5bbc..710a9d000 100755 --- a/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh +++ b/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh @@ -97,7 +97,7 @@ for CONF in rva20 rva22 rva23 rva23-evl rva23-mrvv-vec-bits; do QEMU_CPU=$RVA23_QEMU_CPU ;; rva23-evl) - CFLAGS="-march=rva23u64 -mllvm -force-tail-folding-style=data-with-evl -mllvm -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue" + CFLAGS="-march=rva23u64 -mllvm -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue" QEMU_CPU=$RVA23_QEMU_CPU ;; rva23-mrvv-vec-bits) From 8940ff2e77c28fb655adf1e2b53bd2af4bea096d Mon Sep 17 00:00:00 2001 From: Justice Adams <107649528+justice-adams-apple@users.noreply.github.com> Date: Wed, 30 Jul 2025 14:06:50 -0700 Subject: [PATCH 019/135] Add Stage 1 arm 64 job (#530) Add clang-stage1-RA-as job which runs on arm64 machines --- zorg/jenkins/jobs/jobs/clang-stage1-RA-as | 156 ++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 zorg/jenkins/jobs/jobs/clang-stage1-RA-as diff --git a/zorg/jenkins/jobs/jobs/clang-stage1-RA-as b/zorg/jenkins/jobs/jobs/clang-stage1-RA-as new file mode 100644 index 000000000..82f96d87a --- /dev/null +++ b/zorg/jenkins/jobs/jobs/clang-stage1-RA-as @@ -0,0 +1,156 @@ +pipeline { + options { + disableConcurrentBuilds() + } + + parameters { + string(name: 'LABEL', defaultValue: params.LABEL ?: 'macos-arm64', description: 'Node label to run on') + + string(name: 'GIT_REVISION', defaultValue: params.GIT_REVISION ?: '*/main', description: 'Git revision to build') + } + + agent { + node { + label params.LABEL + } + } + + stages { + stage('Checkout') { + steps { + dir('llvm-project') { + checkout([$class: 'GitSCM', branches: [ + [name: params.GIT_REVISION] + ], extensions: [ + [$class: 'CloneOption', + timeout: 30] + ], userRemoteConfigs: [ + [url: 'https://github.com/llvm/llvm-project.git'] + ]]) + } + dir('llvm-zorg') { + checkout([$class: 'GitSCM', branches: [ + [name: '*/main'] + ], userRemoteConfigs: [ + [url: 'https://github.com/llvm/llvm-zorg.git'] + ]]) + } + } + } + stage('Setup Venv') { + environment { + PATH="$PATH:/usr/bin:/usr/local/bin" + } + steps { + sh ''' + # Non-incremental, so always delete just in case. + rm -rf clang-build clang-install host-compiler *.tar.gz + rm -rf venv + python3 -m venv venv + set +u + source ./venv/bin/activate + python -m pip install -r ./llvm-zorg/zorg/jenkins/jobs/requirements.txt + set -u + ''' + } + } + stage('Build') { + environment { + PATH="$PATH:/usr/bin:/usr/local/bin" + MACOSX_DEPLOYMENT_TARGET="13.6" + } + steps { + timeout(120) { + withCredentials([string(credentialsId: 's3_resource_bucket', variable: 'S3_BUCKET')]) { + sh ''' + set -u + rm -rf build.properties + + source ./venv/bin/activate + + cd llvm-project + git tag -a -m "First Commit" first_commit 97724f18c79c7cc81ced24239eb5e883bf1398ef || true + + git_desc=$(git describe --match "first_commit") + export GIT_DISTANCE=$(echo ${git_desc} | cut -f 2 -d "-") + + sha=$(echo ${git_desc} | cut -f 3 -d "-") + export GIT_SHA=${sha:1} + + # Also save the LLVM_REV until LNT server is taught about GIT + export LLVM_REV=$(git show -q | grep "llvm-svn:" | cut -f2 -d":" | tr -d " ") + + cd - + + echo "GIT_DISTANCE=$GIT_DISTANCE" > build.properties + echo "GIT_SHA=$GIT_SHA" >> build.properties + echo "ARTIFACT=$JOB_NAME/clang-d$GIT_DISTANCE-g$GIT_SHA-t$BUILD_ID-b$BUILD_NUMBER.tar.gz" >> build.properties + + rm -rf clang-build clang-install *.tar.gz + python llvm-zorg/zorg/jenkins/monorepo_build.py cmake build \ + --assertions --cmake-type=RelWithDebInfo \ + --projects="clang;clang-tools-extra;compiler-rt" \ + --cmake-flag="-DPython3_EXECUTABLE=$(which python)" \ + --cmake-flag="-DLLVM_TARGETS_TO_BUILD=AArch64" + ''' + } + } + } + } + stage('Test') { + environment { + PATH="$PATH:/usr/bin:/usr/local/bin" + } + steps { + timeout(120) { + sh ''' + set -u + source ./venv/bin/activate + python llvm-zorg/zorg/jenkins/monorepo_build.py cmake testlong + ''' + } + } + post { + always { + script { + junit "clang-build/**/testresults.xunit.xml" + } + } + } + } + } + post { + always { + script { + // ToDo: Restore the issue scanner + // scanForIssues tool: clang() + sh "rm -rf clang-build clang-install host-compiler" + } + } + // This is commented out because we don't have downstream arm64 jobs setup yet, we will + // in the future + //success { + // script { + // if (!params.SKIP_TRIGGER) { + // // Trigger Stage 2 Jobs + // build job: 'clang-stage2-cmake-RgSan_relay-as', wait: false + // build job: 'clang-stage2-Rthinlto_relay-as', wait: false + // build job: 'relay-lnt-ctmark-as', wait: false + // build job: 'relay-test-suite-verify-machineinstrs-as', wait: false + // } + // } + //} + //unstable { + // script { + // if (!params.SKIP_TRIGGER) { + // // Trigger Stage 2 Jobs + // build job: 'clang-stage2-cmake-RgSan_relay-as', wait: false + // build job: 'clang-stage2-Rthinlto_relay-as', wait: false + // build job: 'relay-lnt-ctmark-as', wait: false + // build job: 'relay-test-suite-verify-machineinstrs-as', wait: false + // } + // } + //} + } +} + From 3e7d7f29561d40513103381685a2470290acb18c Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Wed, 30 Jul 2025 17:51:21 -0400 Subject: [PATCH 020/135] [CI] Use GraphQL API instead of BigQuery to get review data (#525) As we are already making calls to the GitHub GraphQL API for data validation, we can just remove the added complexity of using GitHub Archive BigQuery as a data source and query the API directly. Using BigQuery has the advantage of not being rate-limited, but we often have to query for 50-70 commits via the API anyway due to missing records of events in GitHub Archive. With more than half of the BigQuery data points needing amending, it makes more sense to use the API as the original data source. --- .../ops-container/process_llvm_commits.py | 119 ++---------------- premerge/main.tf | 11 -- 2 files changed, 12 insertions(+), 118 deletions(-) diff --git a/llvm-ops-metrics/ops-container/process_llvm_commits.py b/llvm-ops-metrics/ops-container/process_llvm_commits.py index 5e5b04770..81834d63f 100644 --- a/llvm-ops-metrics/ops-container/process_llvm_commits.py +++ b/llvm-ops-metrics/ops-container/process_llvm_commits.py @@ -3,7 +3,6 @@ import logging import os import git -from google.cloud import bigquery import requests GRAFANA_URL = ( @@ -14,7 +13,7 @@ # How many commits to query the GitHub GraphQL API for at a time. # Querying too many commits at once often leads to the call failing. -GITHUB_API_BATCH_SIZE = 75 +GITHUB_API_BATCH_SIZE = 50 # Number of days to look back for new commits # We allow some buffer time between when a commit is made and when it is queried @@ -22,33 +21,6 @@ # Archive BigQuery tables. LOOKBACK_DAYS = 2 -# Template query to find pull requests associated with commits on a given day. -# Searches for pull requests within a lower and upper bound of Github Archive -# event dates. -GITHUB_ARCHIVE_REVIEW_QUERY = """ -WITH PullRequestReviews AS ( - SELECT DISTINCT - JSON_VALUE(payload, '$.pull_request.id') AS pr_id, - JSON_VALUE(payload, '$.review.state') as review_state, - FROM `githubarchive.day.20*` - WHERE - repo.id = 75821432 - AND `type` = 'PullRequestReviewEvent' - AND (_TABLE_SUFFIX BETWEEN '{lower_review_bound}' AND '{upper_review_bound}') -) -SELECT DISTINCT - JSON_VALUE(pr_event.payload, '$.pull_request.merge_commit_sha') AS merge_commit_sha, - JSON_VALUE(pr_event.payload, '$.pull_request.number') AS pull_request_number, - pr_review.review_state as review_state -FROM `githubarchive.day.{commit_date}` AS pr_event -LEFT JOIN PullRequestReviews as pr_review ON - JSON_VALUE(pr_event.payload, '$.pull_request.id') = pr_review.pr_id # PR ID should match the review events -WHERE - pr_event.repo.id = 75821432 - AND pr_event.`type` = 'PullRequestEvent' - AND JSON_VALUE(pr_event.payload, '$.pull_request.merge_commit_sha') IS NOT NULL -""" - # Template GraphQL subquery to check if a commit has an associated pull request # and whether that pull request has been reviewed and approved. COMMIT_GRAPHQL_SUBQUERY_TEMPLATE = """ @@ -113,26 +85,17 @@ def scrape_new_commits_by_date( def query_for_reviews( - new_commits: list[git.Commit], commit_datetime: datetime.datetime + new_commits: list[git.Commit], github_token: str ) -> list[LLVMCommitInfo]: - """Query GitHub Archive BigQuery for reviews of new commits. + """Query GitHub GraphQL API for reviews of new commits. Args: new_commits: List of new commits to query for reviews. - commit_datetime: The date that the new commits were made on. + github_token: The access token to use with the GitHub GraphQL API. Returns: List of LLVMCommitInfo objects for each commit's review information. """ - - # Search for reviews in the last 4 weeks - earliest_review_date = ( - commit_datetime - datetime.timedelta(weeks=4) - ).strftime("%Y%m%d") - latest_review_date = datetime.datetime.now(datetime.timezone.utc).strftime( - "%Y%m%d" - ) - # Create a map of commit sha to info new_commits = { commit.hexsha: LLVMCommitInfo( @@ -141,67 +104,13 @@ def query_for_reviews( for commit in new_commits } - # Query each relevant daily GitHub Archive table - query = GITHUB_ARCHIVE_REVIEW_QUERY.format( - commit_date=commit_datetime.strftime("%Y%m%d"), - lower_review_bound=earliest_review_date.removeprefix("20"), - upper_review_bound=latest_review_date.removeprefix("20"), - ) - bq_client = bigquery.Client() - query_job = bq_client.query(query) - results = query_job.result() - - # Process each found merge commit - for row in results: - # If this commit is irrelevant, skip it - # Not every merge_commit_sha makes it into main, a "merge commit" can mean - # different things depending on the state of the pull request. - # docs.github.com/en/rest/pulls/pulls#get-a-pull-request for more details. - merge_commit_sha = row["merge_commit_sha"] - if merge_commit_sha not in new_commits: - continue - - commit_info = new_commits[merge_commit_sha] - commit_info.has_pull_request = True - commit_info.pr_number = row["pull_request_number"] - commit_info.is_reviewed = row["review_state"] is not None - commit_info.is_approved = row["review_state"] == "approved" - - logging.info( - "Total gigabytes processed: %d GB", - query_job.total_bytes_processed / (1024**3), - ) - - return list(new_commits.values()) - - -def validate_push_commits( - new_commits: list[LLVMCommitInfo], github_token: str -) -> None: - """Validate that push commits don't have a pull request. - - To address lossiness of data from GitHub Archive BigQuery, we check each - commit to see if it actually has an associated pull request. - - Args: - new_commits: List of commits to validate. - github_token: The access token to use with the GitHub GraphQL API. - """ - - # Get all push commits from new commits and form their subqueries + # Create GraphQL subqueries for each commit commit_subqueries = [] - potential_push_commits = {} - for commit in new_commits: - if commit.has_pull_request: - continue - potential_push_commits[commit.commit_sha] = commit + for commit_sha in new_commits: commit_subqueries.append( - COMMIT_GRAPHQL_SUBQUERY_TEMPLATE.format(commit_sha=commit.commit_sha) + COMMIT_GRAPHQL_SUBQUERY_TEMPLATE.format(commit_sha=commit_sha) ) - logging.info("Found %d potential push commits", len(potential_push_commits)) - # Query GitHub GraphQL API for pull requests associated with push commits - # We query in batches as large queries often fail api_commit_data = {} query_template = """ query { @@ -235,23 +144,22 @@ def validate_push_commits( logging.error("Failed to query GitHub GraphQL API: %s", response.text) api_commit_data.update(response.json()["data"]["repository"]) - amend_count = 0 for commit_sha, data in api_commit_data.items(): # Verify that push commit has no pull requests commit_sha = commit_sha.removeprefix("commit_") + + # If commit has no pull requests, skip it. No data to update. if data["associatedPullRequests"]["totalCount"] == 0: continue - # Amend fields with new data from API pull_request = data["associatedPullRequests"]["pullRequest"][0] - commit_info = potential_push_commits[commit_sha] + commit_info = new_commits[commit_sha] commit_info.has_pull_request = True commit_info.pr_number = pull_request["number"] commit_info.is_reviewed = pull_request["reviewDecision"] is not None commit_info.is_approved = pull_request["reviewDecision"] == "APPROVED" - amend_count += 1 - logging.info("Amended %d commits", amend_count) + return list(new_commits.values()) def upload_daily_metrics( @@ -316,10 +224,7 @@ def main() -> None: return logging.info("Querying for reviews of new commits.") - new_commit_info = query_for_reviews(new_commits, date_to_scrape) - - logging.info("Validating push commits.") - validate_push_commits(new_commit_info, github_token) + new_commit_info = query_for_reviews(new_commits, github_token) logging.info("Uploading metrics to Grafana.") upload_daily_metrics(grafana_api_key, grafana_metrics_userid, new_commit_info) diff --git a/premerge/main.tf b/premerge/main.tf index b3bf4afc7..ec16e267f 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -231,17 +231,6 @@ resource "google_service_account" "operational_metrics_gsa" { display_name = "Operational Metrics GSA" } -resource "google_project_iam_binding" "bigquery_jobuser_binding" { - project = google_service_account.operational_metrics_gsa.project - role = "roles/bigquery.jobUser" - - members = [ - "serviceAccount:${google_service_account.operational_metrics_gsa.email}", - ] - - depends_on = [google_service_account.operational_metrics_gsa] -} - resource "kubernetes_namespace" "operational_metrics" { metadata { name = "operational-metrics" From f6e7cdbdd99a24fbb880823300b5a1e8125e830a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 31 Jul 2025 08:55:11 -0700 Subject: [PATCH 021/135] [CI] Add dispatch_job script This patch adds the dispatch_job python script. This script is designed to be invoked from within a llvm-zorg AnnotatedBuilder and spawn jobs inside of kubernetes pods on the premerge cluster. This is not directly integrated into an AnnotatedBuilder given the additional dependencies that we have on the kubernetes client API. Reviewers: lnihlen, dschuff, Keenuts, gburgessiv, cmtice Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/526 --- premerge/buildbot/dispatch_job.py | 179 ++++++++++++++++++++++++ premerge/buildbot/dispatch_job_test.py | 86 ++++++++++++ premerge/buildbot/requirements.lock.txt | 54 +++++++ premerge/buildbot/requirements.txt | 1 + 4 files changed, 320 insertions(+) create mode 100644 premerge/buildbot/dispatch_job.py create mode 100644 premerge/buildbot/dispatch_job_test.py create mode 100644 premerge/buildbot/requirements.lock.txt create mode 100644 premerge/buildbot/requirements.txt diff --git a/premerge/buildbot/dispatch_job.py b/premerge/buildbot/dispatch_job.py new file mode 100644 index 000000000..31fd4b152 --- /dev/null +++ b/premerge/buildbot/dispatch_job.py @@ -0,0 +1,179 @@ +"""Dispatches a job to the k8s cluster. + +This script takes in a commit SHA to test along with the platform, spawns a job +to test it, and then streams the logs from the job. We read logs from the job +every so often using the kuberntes logging API rather than directly executing +commands inside the container and streaming the output. This is to work +around https://github.com/kubernetes-sigs/apiserver-network-proxy/issues/748. +""" + +import sys +import logging +import time +import dateutil +import datetime +import json + +import kubernetes + +PLATFORM_TO_NAMESPACE = {"Linux": "llvm-premerge-linux-buildbot"} +LOG_SECONDS_TO_QUERY = 10 +SECONDS_QUERY_LOGS_EVERY = 5 + + +def start_build_linux(commit_sha: str, k8s_client) -> str: + """Spawns a pod to build/test LLVM at the specified SHA. + + Args: + commit_sha: The commit SHA to build/run the tests at. + k8s_client: The kubernetes client instance to use for spawning the pod. + + Returns: + A string containing the name of the pod. + """ + pod_name = f"build-{commit_sha}" + commands = [ + "git clone --depth 100 https://github.com/llvm/llvm-project", + "cd llvm-project", + f"git checkout ${commit_sha}", + "export CC=clang", + "export CXX=clang++", + './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' + "echo BUILD FINISHED", + ] + pod_definition = { + "apiVersion": "v1", + "kind": "Pod", + "metadata": { + "name": pod_name, + "namespace": PLATFORM_TO_NAMESPACE["Linux"], + }, + "spec": { + "containers": [ + { + "name": "build", + "image": "ghcr.io/llvm/ci-ubuntu-24.04", + "command": ["/bin/bash", "-c", ";".join(commands)], + } + ], + "restartPolicy": "Never", + }, + } + kubernetes.utils.create_from_dict(k8s_client, pod_definition) + return pod_name + + +def read_logs(pod_name: str, namespace: str, v1_api) -> list[str]: + """Reads logs from the specified pod. + + Reads logs using the k8s API and returns a nicely formatted list of + strings. + + Args: + pod_name: The name of the pod to read logs from. + namespace: The namespace the pod is in. + v1_api: The kubernetes API instance to use for querying logs. + + Returns: + A list of strings representing the log lines. + """ + logs = v1_api.read_namespaced_pod_log( + name=pod_name, + namespace=namespace, + timestamps=True, + since_seconds=LOG_SECONDS_TO_QUERY, + ) + return logs.split("\n")[:-1] + + +def get_logs_to_print( + logs: list[str], latest_time: datetime.datetime +) -> tuple[datetime.datetime, list[str]]: + """Get the logs that we should be printing. + + This function takes in a raw list of logs along with the timestamp of the + last log line to be printed and returns the new log lines that should be + printed. + + Args: + logs: The raw list of log lines. + latest_time: The timestamp from the last log line that was printed. + + Returns: + A tuple containing the timestamp of the last log line returned and a list + of strings containing the log lines that should be printed. + """ + first_new_index = 0 + time_stamp = latest_time + for log_line in logs: + time_stamp_str = log_line.split(" ")[0] + time_stamp = dateutil.parser.parse(time_stamp_str[:-1]) + if time_stamp > latest_time: + break + first_new_index += 1 + last_time_stamp = latest_time + if logs: + last_time_stamp_str = logs[-1].split(" ")[0] + last_time_stamp = dateutil.parser.parse(last_time_stamp_str[:-1]) + return (last_time_stamp, logs[first_new_index:]) + + +def print_logs( + pod_name: str, namespace: str, v1_api, lastest_time: datetime.datetime +) -> tuple[bool, datetime.datetime]: + """Queries the pod and prints the relevant log lines. + + Args: + pod_name: The pod to print the logs for. + namespace: The namespace the log is in. + v1_api: The kubernetes API client instance to use for querying the logs. + latest_time: The timestamp of the last log line to be printed. + + Returns: + A tuple containing a boolean representing whether or not the pod has + finished executing and the timestamp of the last log line printed. + """ + logs = read_logs(pod_name, namespace, v1_api) + new_time_stamp, logs_to_print = get_logs_to_print(logs, lastest_time) + pod_finished = False + for log_line in logs_to_print: + print(log_line.split("\r")[-1]) + if "BUILD FINISHED" in log_line: + pod_finished = True + + return (pod_finished, new_time_stamp) + + +def main(commit_sha: str, platform: str): + kubernetes.config.load_kube_config() + k8s_client = kubernetes.client.ApiClient() + if platform == "Linux": + pod_name = start_build_linux(commit_sha, k8s_client) + else: + raise ValueError("Unrecognized platform.") + namespace = PLATFORM_TO_NAMESPACE[platform] + latest_time = datetime.datetime.min + v1_api = kubernetes.client.CoreV1Api() + while True: + try: + pod_finished, latest_time = print_logs( + pod_name, namespace, v1_api, latest_time + ) + if pod_finished: + break + except kubernetes.client.exceptions.ApiException as log_exception: + if "ContainerCreating" in json.loads(log_exception.body)["message"]: + logging.warning( + "Cannot yet read logs from the pod: waiting for the container to start." + ) + else: + logging.warning(f"Failed to get logs from the pod: {log_exception}") + time.sleep(SECONDS_QUERY_LOGS_EVERY) + v1_api.delete_namespaced_pod(pod_name, namespace) + + +if __name__ == "__main__": + if len(sys.argv) != 3: + logging.fatal("Expected usage is dispatch_job.py {commit SHA} {platform}") + sys.exit(1) + main(sys.argv[1], sys.argv[2]) diff --git a/premerge/buildbot/dispatch_job_test.py b/premerge/buildbot/dispatch_job_test.py new file mode 100644 index 000000000..8a7dc311a --- /dev/null +++ b/premerge/buildbot/dispatch_job_test.py @@ -0,0 +1,86 @@ +"""Tests for the dispatch_job.py script.""" + +import unittest +import datetime +import dateutil + +import dispatch_job + + +class TestDispatchJobs(unittest.TestCase): + def test_get_logs_first_time(self): + """Test we return the correct logs if we have not seen any before.""" + log_lines = [ + "2025-07-29T15:48:00.259595535Z test1", + "2025-07-29T15:48:00.383251277Z test2", + ] + current_timestamp = datetime.datetime.min + latest_timestamp, lines_to_print = dispatch_job.get_logs_to_print( + log_lines, current_timestamp + ) + self.assertSequenceEqual( + lines_to_print, + [ + "2025-07-29T15:48:00.259595535Z test1", + "2025-07-29T15:48:00.383251277Z test2", + ], + ) + self.assertEqual( + latest_timestamp, dateutil.parser.parse("2025-07-29T15:48:00.383251277") + ) + + def test_get_logs_nonoverlapping(self): + """Test we return the correct logs for non-overlapping ranges. + + Test that if the timestamp of the last log that we have printed is + less than the current set returned by kubernetes, we return the correct + lines. + """ + log_lines = [ + "2025-07-29T15:48:01.787177054Z test1", + "2025-07-29T15:48:03.074715108Z test2", + ] + current_timestamp = dateutil.parser.parse("2025-07-29T15:48:00.383251277") + latest_timestamp, lines_to_print = dispatch_job.get_logs_to_print( + log_lines, current_timestamp + ) + self.assertSequenceEqual( + lines_to_print, + [ + "2025-07-29T15:48:01.787177054Z test1", + "2025-07-29T15:48:03.074715108Z test2", + ], + ) + self.assertEqual( + latest_timestamp, dateutil.parser.parse("2025-07-29T15:48:03.074715108") + ) + + def test_get_logs_overlapping(self): + """Test we return the correct logs for overlapping ranges. + + Test that if the last line to be printed is contained within the logs + kubernetes returned, we skip the lines that have already been printed. + """ + log_lines = [ + "2025-07-29T15:48:00.383251277Z test1", + "2025-07-29T15:48:01.787177054Z test2", + "2025-07-29T15:48:03.074715108Z test3", + ] + current_timestamp = dateutil.parser.parse("2025-07-29T15:48:00.383251277") + latest_timestamp, lines_to_print = dispatch_job.get_logs_to_print( + log_lines, current_timestamp + ) + self.assertSequenceEqual( + lines_to_print, + [ + "2025-07-29T15:48:01.787177054Z test2", + "2025-07-29T15:48:03.074715108Z test3", + ], + ) + self.assertEqual( + latest_timestamp, dateutil.parser.parse("2025-07-29T15:48:03.074715108") + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/premerge/buildbot/requirements.lock.txt b/premerge/buildbot/requirements.lock.txt new file mode 100644 index 000000000..6f65d1060 --- /dev/null +++ b/premerge/buildbot/requirements.lock.txt @@ -0,0 +1,54 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --output-file=requirements.lock.txt requirements.txt +# +cachetools==5.5.2 + # via google-auth +certifi==2025.7.14 + # via + # kubernetes + # requests +charset-normalizer==3.4.2 + # via requests +durationpy==0.10 + # via kubernetes +google-auth==2.40.3 + # via kubernetes +idna==3.10 + # via requests +kubernetes==33.1.0 + # via -r requirements.txt +oauthlib==3.3.1 + # via + # kubernetes + # requests-oauthlib +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.2 + # via google-auth +python-dateutil==2.9.0.post0 + # via kubernetes +pyyaml==6.0.2 + # via kubernetes +requests==2.32.4 + # via + # kubernetes + # requests-oauthlib +requests-oauthlib==2.0.0 + # via kubernetes +rsa==4.9.1 + # via google-auth +six==1.17.0 + # via + # kubernetes + # python-dateutil +urllib3==2.5.0 + # via + # kubernetes + # requests +websocket-client==1.8.0 + # via kubernetes diff --git a/premerge/buildbot/requirements.txt b/premerge/buildbot/requirements.txt new file mode 100644 index 000000000..59942502d --- /dev/null +++ b/premerge/buildbot/requirements.txt @@ -0,0 +1 @@ +kubernetes==33.1.0 From e2940c0471cd1082f2e6f2975d7d6f8634c2b226 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 31 Jul 2025 08:56:06 -0700 Subject: [PATCH 022/135] [CI] Add container image for postsubmit testing of premerge config This patch adds a container image and some necessary setup code so we can run this through buildbot. Reviewers: Keenuts, cmtice, dschuff, gburgessiv, lnihlen Reviewed By: cmtice, Keenuts Pull Request: https://github.com/llvm/llvm-zorg/pull/527 --- premerge/buildbot/Dockerfile | 10 +++++++ premerge/buildbot/requirements.lock.txt | 40 ++++++++++++++++++++++++- premerge/buildbot/requirements.txt | 1 + premerge/buildbot/startup.sh | 20 +++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 premerge/buildbot/Dockerfile create mode 100644 premerge/buildbot/startup.sh diff --git a/premerge/buildbot/Dockerfile b/premerge/buildbot/Dockerfile new file mode 100644 index 000000000..96ee435cc --- /dev/null +++ b/premerge/buildbot/Dockerfile @@ -0,0 +1,10 @@ +FROM ubuntu:24.04 +RUN apt-get update && apt-get install -y python3 python3-pip +COPY requirements.lock.txt /requirements.lock.txt +RUN pip3 install --break-system-packages -r /requirements.lock.txt && rm /requirements.lock.txt +RUN mkdir /app +WORKDIR /app +COPY dispatch_job.py . +COPY startup.sh . +RUN chmod +x startup.sh +ENTRYPOINT /app/startup.sh diff --git a/premerge/buildbot/requirements.lock.txt b/premerge/buildbot/requirements.lock.txt index 6f65d1060..a49faeed5 100644 --- a/premerge/buildbot/requirements.lock.txt +++ b/premerge/buildbot/requirements.lock.txt @@ -4,22 +4,46 @@ # # pip-compile --output-file=requirements.lock.txt requirements.txt # +attrs==25.3.0 + # via twisted +autobahn==24.4.2 + # via buildbot-worker +automat==25.4.16 + # via twisted +buildbot-worker==3.11.7 + # via -r requirements.txt cachetools==5.5.2 # via google-auth certifi==2025.7.14 # via # kubernetes # requests +cffi==1.17.1 + # via cryptography charset-normalizer==3.4.2 # via requests +constantly==23.10.4 + # via twisted +cryptography==45.0.5 + # via autobahn durationpy==0.10 # via kubernetes google-auth==2.40.3 # via kubernetes +hyperlink==21.0.0 + # via + # autobahn + # twisted idna==3.10 - # via requests + # via + # hyperlink + # requests +incremental==24.7.2 + # via twisted kubernetes==33.1.0 # via -r requirements.txt +msgpack==1.1.1 + # via buildbot-worker oauthlib==3.3.1 # via # kubernetes @@ -30,6 +54,8 @@ pyasn1==0.6.1 # rsa pyasn1-modules==0.4.2 # via google-auth +pycparser==2.22 + # via cffi python-dateutil==2.9.0.post0 # via kubernetes pyyaml==6.0.2 @@ -44,11 +70,23 @@ rsa==4.9.1 # via google-auth six==1.17.0 # via + # buildbot-worker # kubernetes # python-dateutil +twisted==25.5.0 + # via buildbot-worker +txaio==25.6.1 + # via autobahn +typing-extensions==4.14.1 + # via twisted urllib3==2.5.0 # via # kubernetes # requests websocket-client==1.8.0 # via kubernetes +zope-interface==7.2 + # via twisted + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/premerge/buildbot/requirements.txt b/premerge/buildbot/requirements.txt index 59942502d..dfd1b8447 100644 --- a/premerge/buildbot/requirements.txt +++ b/premerge/buildbot/requirements.txt @@ -1 +1,2 @@ kubernetes==33.1.0 +buildbot-worker==3.11.7 diff --git a/premerge/buildbot/startup.sh b/premerge/buildbot/startup.sh new file mode 100644 index 000000000..7f39b6463 --- /dev/null +++ b/premerge/buildbot/startup.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# This script performs all the necessary setup and then starts the buildbot +# worker. + +mkdir /worker +buildbot-worker create-worker /worker \ + lab.llvm.org:9994 \ + $BUILDBOT_USERNAME \ + $BUILDBOT_PASSWORD + +echo "Google LLVM Premerge Infra Rotation " \ + > /worker/info/admin + +{ + echo "Premerge container (https://github.com/llvm/llvm-project/pkgs/container/ci-ubuntu-24.04)" + echo "GCP n2/n2d standard instances." +} > /worker/info/host + +buildbot-worker start /worker From af36012440ea365b424f3185e029342a001f8715 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 31 Jul 2025 08:59:07 -0700 Subject: [PATCH 023/135] [CI] Add workflow to build premerge-buildbot container This patch sets up a workflow in a similar vein to all the other container images for building the premerge-buildbot container and publishing it to GHCR. There is a lot of duplicate code and eventually this should be refactored into an action. This is on my TODO list. Reviewers: cmtice Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/528 --- .../build-premerge-buildbot-container.yml | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 .github/workflows/build-premerge-buildbot-container.yml diff --git a/.github/workflows/build-premerge-buildbot-container.yml b/.github/workflows/build-premerge-buildbot-container.yml new file mode 100644 index 000000000..cfb2a0f49 --- /dev/null +++ b/.github/workflows/build-premerge-buildbot-container.yml @@ -0,0 +1,76 @@ +name: Build Premerge Buildbot Container + +permissions: + contents: read + +on: + push: + branches: + - main + paths: + - .github/workflows/build-premerge-buildbot-container.yml + - 'premerge/buildbot/**' + pull_request: + paths: + - .github/workflows/build-premerge-buildbot-container.yml + - 'premerge/buildbot/**' + +jobs: + build-premerge-buildbot-container: + if: github.repository_owner == 'llvm' + runs-on: ubuntu-24.04 + outputs: + container-name: ${{ steps.vars.outputs.container-name }} + container-name-tag: ${{ steps.vars.outputs.container-name-tag }} + container-filename: ${{ steps.vars.outputs.container-filename }} + steps: + - name: Checkout LLVM Zorg + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: premerge/buildbot + - name: Write Variables + id: vars + run: | + tag=`date +%s` + container_name="ghcr.io/$GITHUB_REPOSITORY_OWNER/premerge-buildbot" + echo "container-name=$container_name" >> $GITHUB_OUTPUT + echo "container-name-tag=$container_name:$tag" >> $GITHUB_OUTPUT + echo "container-filename=$(echo $container_name:$tag | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT + - name: Build Container + working-directory: ./premerge/buildbot + run: | + podman build -t ${{ steps.vars.outputs.container-name-tag }} -f Dockerfile . + # Save the container so we have it in case the push fails. This also + # allows us to separate the push step into a different job so we can + # maintain minimal permissions while building the container. + - name: Save Container Image + run: | + podman save ${{ steps.vars.outputs.container-name-tag }} > ${{ steps.vars.outputs.container-filename }} + - name: Upload Container Image + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: container + path: ${{ steps.vars.outputs.container-filename }} + retention-days: 14 + + push-premerge-buildbot-container: + if: github.event_name == 'push' + needs: + - build-premerge-buildbot-container + permissions: + packages: write + runs-on: ubuntu-24.04 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Download Container Image + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: container + - name: Push Container + run: | + podman load -i ${{ needs.build-premerge-buildbot-container.outputs.container-filename }} + podman tag ${{ needs.build-premerge-buildbot-container.outputs.container-name-tag }} ${{ needs.build-premerge-buildbot-container.outputs.container-name }}:latest + podman login -u ${{ github.actor }} -p $GITHUB_TOKEN ghcr.io + podman push ${{ needs.build-premerge-buildbot-container.outputs.container-name-tag }} + podman push ${{ needs.build-premerge-buildbot-container.outputs.container-name }}:latest From 692cc59c4a354bb588b4d2357d908d440c5b910a Mon Sep 17 00:00:00 2001 From: Kewen12 Date: Thu, 31 Jul 2025 09:02:49 -0700 Subject: [PATCH 024/135] [Bot][HIP] Add prune to git fetch step for hip bot (#531) We saw that this bot was broken due to branch name conflict when running git fetch. Adding --prune to prevent such issue. --- zorg/buildbot/builders/annotated/hip-build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zorg/buildbot/builders/annotated/hip-build.sh b/zorg/buildbot/builders/annotated/hip-build.sh index c57ca4b4d..e699fa18c 100755 --- a/zorg/buildbot/builders/annotated/hip-build.sh +++ b/zorg/buildbot/builders/annotated/hip-build.sh @@ -52,7 +52,7 @@ if [ ! -d "${LLVM_ROOT}" ]; then fi build_step "Updating llvm-project repo" -git -C "${LLVM_ROOT}" fetch origin +git -C "${LLVM_ROOT}" fetch --prune origin git -C "${LLVM_ROOT}" reset --hard "${LLVM_REVISION}" } @@ -64,7 +64,7 @@ if [ ! -d "${TESTSUITE_ROOT}" ]; then fi build_step "Updating llvm-test-suite repo" -git -C "${TESTSUITE_ROOT}" fetch origin +git -C "${TESTSUITE_ROOT}" fetch --prune origin git -C "${TESTSUITE_ROOT}" reset --hard origin/main } From e2500a901f0c6701e54dc1375e0e5d79702ab712 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 31 Jul 2025 09:05:35 -0700 Subject: [PATCH 025/135] [CI] Remove old premerge bots This patch removes the old LLVM premerge bots from llvm-zorg in preparation for adding in the new ones. Reviewers: cmtice, dschuff, gkistanova, gburgessiv, Keenuts, lnihlen Reviewed By: cmtice, Keenuts Pull Request: https://github.com/llvm/llvm-zorg/pull/529 --- buildbot/osuosl/master/config/builders.py | 44 ----------------------- buildbot/osuosl/master/config/status.py | 10 ------ buildbot/osuosl/master/config/workers.py | 7 ---- 3 files changed, 61 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index d44dd4857..f39fb8324 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3536,50 +3536,6 @@ script_interpreter=None, clean=True)}, - # Builders similar to used in Buildkite premerge pipeline. - # Please keep in sync with llvm-project/.ci configurations. - - # See https://github.com/llvm/llvm-project/blob/main/.ci/monolithic-windows.sh. - {'name' : "premerge-monolithic-windows", - 'tags' : ["premerge"], - 'workernames' : ["premerge-windows-1"], - 'builddir': "premerge-monolithic-windows", - 'factory' : UnifiedTreeBuilder.getCmakeWithNinjaWithMSVCBuildFactory( - vs="autodetect", - depends_on_projects=["clang-tools-extra", "clang", "libclc", "lld", "llvm", "mlir", "polly"], - checks=["check-all"], - install_pip_requirements = True, - clean = True, - extra_configure_args=[ - "-DCMAKE_BUILD_TYPE=Release", - "-DLLVM_ENABLE_ASSERTIONS=ON", - "-DLLVM_BUILD_EXAMPLES=ON", - "-DCOMPILER_RT_BUILD_LIBFUZZER=OFF", - "-DLLVM_LIT_ARGS=-v", - "-DMLIR_ENABLE_BINDINGS_PYTHON=ON", - "-DCOMPILER_RT_BUILD_ORC=OFF", - "-DCMAKE_C_COMPILER_LAUNCHER=sccache", - "-DCMAKE_CXX_COMPILER_LAUNCHER=sccache"])}, - # See https://github.com/llvm/llvm-project/blob/main/.ci/monolithic-linux.sh. - {'name': "premerge-monolithic-linux", - 'tags' : ["premerge"], - 'collapseRequests': False, - 'workernames': ["premerge-linux-1"], - 'builddir': "premerge-monolithic-linux", - 'factory': UnifiedTreeBuilder.getCmakeWithNinjaBuildFactory( - depends_on_projects=["bolt", "clang", "clang-tools-extra", "compiler-rt", "flang", "flang-rt", "libc", "libclc", "lld", "llvm", "mlir", "polly"], - install_pip_requirements = True, - extra_configure_args=[ - "-DCMAKE_BUILD_TYPE=Release", - "-DLLVM_ENABLE_ASSERTIONS=ON", - "-DLLVM_BUILD_EXAMPLES=ON", - "-DCOMPILER_RT_BUILD_LIBFUZZER=OFF", - "-DMLIR_ENABLE_BINDINGS_PYTHON=ON", - "-DLLVM_LIT_ARGS=-v", - "-DLLVM_ENABLE_LLD=ON", - "-DCMAKE_CXX_FLAGS=-gmlt", - "-DLLVM_CCACHE_BUILD=ON"])}, - ] # LLDB remote-linux builder env variables. diff --git a/buildbot/osuosl/master/config/status.py b/buildbot/osuosl/master/config/status.py index 4f1a6ef13..30a46a037 100644 --- a/buildbot/osuosl/master/config/status.py +++ b/buildbot/osuosl/master/config/status.py @@ -550,16 +550,6 @@ def getReporters(): utils.LLVMDefaultBuildStatusGenerator( builders = ["clang-cmake-x86_64-avx512-linux"]) ]), - reporters.MailNotifier( - fromaddr = status_email_fromaddr, - sendToInterestedUsers = False, - extraRecipients = ["llvm-premerge-buildbots@google.com", "joker.eph@gmail.com"], - generators = [ - utils.LLVMDefaultBuildStatusGenerator( - builders = [ - "premerge-monolithic-windows", - "premerge-monolithic-linux"]) - ]), reporters.MailNotifier( fromaddr = status_email_fromaddr, sendToInterestedUsers = False, diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 01ede0558..20f419e48 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -406,11 +406,4 @@ def get_all(): # FIXME: A placeholder for annoying worker which nobody could stop. # adding it avoid logs spammed by failed authentication for that worker. create_worker("mlir-ubuntu-worker0"), - - # Linux builder matching Buildkite pre-merge checks configuration. - create_worker("premerge-linux-1", max_builds=1, missing_timeout=300, - notify_on_missing="llvm-premerge-buildbots@google.com"), - # Windows builder matching Buildkite pre-merge checks configuration. - create_worker("premerge-windows-1", max_builds=1, missing_timeout=300, - notify_on_missing="llvm-premerge-buildbots@google.com"), ] From 2ea7609f68e39ee6818fed81e11a76095aaa02fa Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Fri, 1 Aug 2025 15:32:37 -0400 Subject: [PATCH 026/135] [CI] Export scraped commit data to a BigQuery dataset (#532) Currently, the data we scrape and process regarding LLVM commits isn't persistent and cannot be referenced outside of each CronJob invocation. This change uploads scraped and parsed LLVM commit data to a new BigQuery dataset, so that we may access and reuse this data without having to requery and reparse the same commits to llvm-project. --- .../ops-container/process_llvm_commits.py | 70 ++++++++++++++++--- .../llvm_commits_table_schema.json | 56 +++++++++++++++ premerge/main.tf | 27 +++++++ 3 files changed, 145 insertions(+), 8 deletions(-) create mode 100644 premerge/bigquery_schema/llvm_commits_table_schema.json diff --git a/llvm-ops-metrics/ops-container/process_llvm_commits.py b/llvm-ops-metrics/ops-container/process_llvm_commits.py index 81834d63f..fbf6a1c65 100644 --- a/llvm-ops-metrics/ops-container/process_llvm_commits.py +++ b/llvm-ops-metrics/ops-container/process_llvm_commits.py @@ -3,6 +3,7 @@ import logging import os import git +from google.cloud import bigquery import requests GRAFANA_URL = ( @@ -11,6 +12,10 @@ GITHUB_GRAPHQL_API_URL = "https://api.github.com/graphql" REPOSITORY_URL = "https://github.com/llvm/llvm-project.git" +# BigQuery dataset and tables to write metrics to. +OPERATIONAL_METRICS_DATASET = "operational_metrics" +LLVM_COMMITS_TABLE = "llvm_commits" + # How many commits to query the GitHub GraphQL API for at a time. # Querying too many commits at once often leads to the call failing. GITHUB_API_BATCH_SIZE = 50 @@ -27,11 +32,23 @@ commit_{commit_sha}: object(oid:"{commit_sha}") {{ ... on Commit {{ + author {{ + user {{ + login + }} + }} associatedPullRequests(first: 1) {{ totalCount pullRequest: nodes {{ number reviewDecision + reviews(first: 10) {{ + nodes {{ + reviewer: author {{ + login + }} + }} + }} }} }} }} @@ -42,12 +59,14 @@ @dataclasses.dataclass class LLVMCommitInfo: commit_sha: str - commit_datetime: datetime.datetime commit_timestamp_seconds: int + files_modified: set[str] + commit_author: str = "" # GitHub username of author is unknown until API call has_pull_request: bool = False - pr_number: int = 0 + pull_request_number: int = 0 is_reviewed: bool = False is_approved: bool = False + reviewers: set[str] = dataclasses.field(default_factory=set) def scrape_new_commits_by_date( @@ -99,7 +118,9 @@ def query_for_reviews( # Create a map of commit sha to info new_commits = { commit.hexsha: LLVMCommitInfo( - commit.hexsha, commit.committed_datetime, commit.committed_date + commit_sha=commit.hexsha, + commit_timestamp_seconds=commit.committed_date, + files_modified=set(commit.stats.files.keys()), ) for commit in new_commits } @@ -140,29 +161,41 @@ def query_for_reviews( }, json={"query": query}, ) + + # Exit if API call fails + # A failed API call means a large batch of data is missing and will not be + # reflected in the dashboard. The dashboard will silently misrepresent + # commit data if we continue execution, so it's better to fail loudly. if response.status_code < 200 or response.status_code >= 300: logging.error("Failed to query GitHub GraphQL API: %s", response.text) + exit(1) + api_commit_data.update(response.json()["data"]["repository"]) + # Amend commit information with GitHub data for commit_sha, data in api_commit_data.items(): - # Verify that push commit has no pull requests commit_sha = commit_sha.removeprefix("commit_") + commit_info = new_commits[commit_sha] + commit_info.commit_author = data["author"]["user"]["login"] # If commit has no pull requests, skip it. No data to update. if data["associatedPullRequests"]["totalCount"] == 0: continue pull_request = data["associatedPullRequests"]["pullRequest"][0] - commit_info = new_commits[commit_sha] commit_info.has_pull_request = True - commit_info.pr_number = pull_request["number"] + commit_info.pull_request_number = pull_request["number"] commit_info.is_reviewed = pull_request["reviewDecision"] is not None commit_info.is_approved = pull_request["reviewDecision"] == "APPROVED" + commit_info.reviewers = set([ + review["reviewer"]["login"] + for review in pull_request["reviews"]["nodes"] + ]) return list(new_commits.values()) -def upload_daily_metrics( +def upload_daily_metrics_to_grafana( grafana_api_key: str, grafana_metrics_userid: str, new_commits: list[LLVMCommitInfo], @@ -205,6 +238,22 @@ def upload_daily_metrics( logging.error("Failed to submit data to Grafana: %s", response.text) +def upload_daily_metrics_to_bigquery(new_commits: list[LLVMCommitInfo]) -> None: + """Upload processed commit metrics to a BigQuery dataset. + + Args: + new_commits: List of commits to process & upload to BigQuery. + """ + bq_client = bigquery.Client() + table_ref = bq_client.dataset(OPERATIONAL_METRICS_DATASET).table( + LLVM_COMMITS_TABLE + ) + table = bq_client.get_table(table_ref) + commit_records = [dataclasses.asdict(commit) for commit in new_commits] + bq_client.insert_rows(table, commit_records) + bq_client.close() + + def main() -> None: github_token = os.environ["GITHUB_TOKEN"] grafana_api_key = os.environ["GRAFANA_API_KEY"] @@ -227,7 +276,12 @@ def main() -> None: new_commit_info = query_for_reviews(new_commits, github_token) logging.info("Uploading metrics to Grafana.") - upload_daily_metrics(grafana_api_key, grafana_metrics_userid, new_commit_info) + upload_daily_metrics_to_grafana( + grafana_api_key, grafana_metrics_userid, new_commit_info + ) + + logging.info("Uploading metrics to BigQuery.") + upload_daily_metrics_to_bigquery(new_commit_info) if __name__ == "__main__": diff --git a/premerge/bigquery_schema/llvm_commits_table_schema.json b/premerge/bigquery_schema/llvm_commits_table_schema.json new file mode 100644 index 000000000..8d86c77e6 --- /dev/null +++ b/premerge/bigquery_schema/llvm_commits_table_schema.json @@ -0,0 +1,56 @@ +[ + { + "name": "commit_sha", + "type": "STRING", + "mode": "NULLABLE", + "description": "Commit hexsha of a commit made to llvm/llvm-project:main" + }, + { + "name": "commit_author", + "type": "STRING", + "mode": "NULLABLE", + "description": "GitHub username of the commit author" + }, + { + "name": "commit_timestamp_seconds", + "type": "INTEGER", + "mode": "NULLABLE", + "description": "Time this commit was made at, as a Unix timestamp" + }, + { + "name": "has_pull_request", + "type": "BOOLEAN", + "mode": "NULLABLE", + "description": "Whether or not this commit has an associated pull request" + }, + { + "name": "pull_request_number", + "type": "INTEGER", + "mode": "NULLABLE", + "description": "Number of the pull request associated with this commit" + }, + { + "name": "is_reviewed", + "type": "BOOLEAN", + "mode": "NULLABLE", + "description": "Whether or not the pull request for this commit was reviewed" + }, + { + "name": "is_approved", + "type": "BOOLEAN", + "mode": "NULLABLE", + "description": "Whether or not the pull request for this commit was approved" + }, + { + "name": "reviewers", + "type": "STRING", + "mode": "REPEATED", + "description": "List of GitHub users who reviewed the pull request for this commit" + }, + { + "name": "files_modified", + "type": "STRING", + "mode": "REPEATED", + "description": "List of filepaths modified by this commit" + } +] diff --git a/premerge/main.tf b/premerge/main.tf index ec16e267f..c30693680 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -293,3 +293,30 @@ resource "kubernetes_manifest" "operational_metrics_cronjob" { kubernetes_service_account.operational_metrics_ksa, ] } + +# BigQuery dataset and table resources +resource "google_bigquery_dataset" "operational_metrics_dataset" { + dataset_id = "operational_metrics" + description = "Dataset for retaining operational data regarding LLVM commit trends." +} + +resource "google_bigquery_table" "llvm_commits_table" { + dataset_id = google_bigquery_dataset.operational_metrics_dataset.dataset_id + table_id = "llvm_commits" + description = "LLVM commit data, including pull request and review activity per commit." + + schema = file("./bigquery_schema/llvm_commits_table_schema.json") + + depends_on = [google_bigquery_dataset.operational_metrics_dataset] +} + +resource "google_bigquery_dataset_iam_binding" "operational_metrics_dataset_editor_binding" { + dataset_id = google_bigquery_dataset.operational_metrics_dataset.dataset_id + role = "roles/bigquery.dataEditor" + + members = [ + "serviceAccount:${google_service_account.operational_metrics_gsa.email}", + ] + + depends_on = [google_bigquery_dataset.operational_metrics_dataset, google_service_account.operational_metrics_gsa] +} From b4ec410b8e8133c221779a3019cade66eb584e0f Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Mon, 4 Aug 2025 16:48:54 -0400 Subject: [PATCH 027/135] [CI] Track unique contributors to LLVM over time (#534) This change uploads additional metrics to Grafana so that we may visualize LLVM contributor data. From newly scraped commits and stored data from BigQuery, we can determine the number of unique LLVM contributors per day, the number of all time unique LLVM contributors, and the number of new contributors to LLVM per day. --- .../ops-container/process_llvm_commits.py | 72 +++++++++++++++---- 1 file changed, 59 insertions(+), 13 deletions(-) diff --git a/llvm-ops-metrics/ops-container/process_llvm_commits.py b/llvm-ops-metrics/ops-container/process_llvm_commits.py index fbf6a1c65..7ee7d788a 100644 --- a/llvm-ops-metrics/ops-container/process_llvm_commits.py +++ b/llvm-ops-metrics/ops-container/process_llvm_commits.py @@ -195,10 +195,29 @@ def query_for_reviews( return list(new_commits.values()) +def get_past_contributors(bq_client: bigquery.Client) -> set[str]: + """Get past contributors to LLVM from BigQuery dataset. + + Args: + bq_client: The BigQuery client to use. + + Returns: + Set of unique past contributors to LLVM. + """ + results = bq_client.query(""" + SELECT + DISTINCT commit_author + FROM %s.%s + WHERE commit_author IS NOT NULL + """ % (OPERATIONAL_METRICS_DATASET, LLVM_COMMITS_TABLE)).result() + return set(row.commit_author for row in results) + + def upload_daily_metrics_to_grafana( grafana_api_key: str, grafana_metrics_userid: str, new_commits: list[LLVMCommitInfo], + past_contributors: set[str], ) -> None: """Upload daily commit metrics to Grafana. @@ -206,12 +225,26 @@ def upload_daily_metrics_to_grafana( grafana_api_key: The key to make API requests with. grafana_metrics_userid: The user to make API requests with. new_commits: List of commits to process & upload to Grafana. + past_contributors: Set of unique past contributors to LLVM. """ + + def post_data(data: str) -> None: + """Helper function to post data to Grafana.""" + response = requests.post( + GRAFANA_URL, + headers={"Content-Type": "text/plain"}, + data=data, + auth=(grafana_metrics_userid, grafana_api_key), + ) + if response.status_code < 200 or response.status_code >= 300: + logging.error("Failed to submit data to Grafana: %s", response.text) + # Count each type of commit made approval_count = 0 review_count = 0 pull_request_count = 0 push_count = 0 + contributors = set() for commit in new_commits: if commit.is_approved: approval_count += 1 @@ -221,37 +254,40 @@ def upload_daily_metrics_to_grafana( pull_request_count += 1 else: push_count += 1 + contributors.add(commit.commit_author) # Post data via InfluxDB API call + # Commit data request_data = ( "llvm_project_main_daily_commits" " approval_count={},review_count={},pull_request_count={},push_count={}" ).format(approval_count, review_count, pull_request_count, push_count) - response = requests.post( - GRAFANA_URL, # Set timestamp precision to seconds - headers={"Content-Type": "text/plain"}, - data=request_data, - auth=(grafana_metrics_userid, grafana_api_key), - ) + post_data(request_data) - if response.status_code < 200 or response.status_code >= 300: - logging.error("Failed to submit data to Grafana: %s", response.text) + # Contributor data + request_data = ( + "llvm_project_main" + " daily_unique_contributor_count={},all_time_unique_contributor_count={}" + .format(len(contributors), len(contributors | past_contributors)) + ) + post_data(request_data) -def upload_daily_metrics_to_bigquery(new_commits: list[LLVMCommitInfo]) -> None: +def upload_daily_metrics_to_bigquery( + bq_client: bigquery.Client, new_commits: list[LLVMCommitInfo] +) -> None: """Upload processed commit metrics to a BigQuery dataset. Args: + bq_client: The BigQuery client to use. new_commits: List of commits to process & upload to BigQuery. """ - bq_client = bigquery.Client() table_ref = bq_client.dataset(OPERATIONAL_METRICS_DATASET).table( LLVM_COMMITS_TABLE ) table = bq_client.get_table(table_ref) commit_records = [dataclasses.asdict(commit) for commit in new_commits] bq_client.insert_rows(table, commit_records) - bq_client.close() def main() -> None: @@ -259,6 +295,8 @@ def main() -> None: grafana_api_key = os.environ["GRAFANA_API_KEY"] grafana_metrics_userid = os.environ["GRAFANA_METRICS_USERID"] + bq_client = bigquery.Client() + # Scrape new commits date_to_scrape = datetime.datetime.now( datetime.timezone.utc @@ -275,13 +313,21 @@ def main() -> None: logging.info("Querying for reviews of new commits.") new_commit_info = query_for_reviews(new_commits, github_token) + logging.info("Getting set of past LLVM contributors.") + past_contributors = get_past_contributors(bq_client) + logging.info("Uploading metrics to Grafana.") upload_daily_metrics_to_grafana( - grafana_api_key, grafana_metrics_userid, new_commit_info + grafana_api_key, + grafana_metrics_userid, + new_commit_info, + past_contributors, ) logging.info("Uploading metrics to BigQuery.") - upload_daily_metrics_to_bigquery(new_commit_info) + upload_daily_metrics_to_bigquery(bq_client, new_commit_info) + + bq_client.close() if __name__ == "__main__": From 1b02c950e13d0bce666633a25ca8a1e578fa095b Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Tue, 5 Aug 2025 10:55:05 -0400 Subject: [PATCH 028/135] [CI] Add BigQuery JobUser role binding for querying operational metrics (#535) This change reintroduces a BigQuery role binding that was removed in #525. Now that our CronJob is also querying past data to determine the number of unique LLVM contributors over time, we must grant the associated service account `roles/bigquery.JobUser` so that the BigQuery client can create query jobs. This is the error without this binding: ``` google.api_core.exceptions.Forbidden: 403 POST: Access Denied: User does not have bigquery.jobs.create permission in project llvm-premerge-checks. ``` --- premerge/main.tf | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/premerge/main.tf b/premerge/main.tf index c30693680..5e3798785 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -231,6 +231,17 @@ resource "google_service_account" "operational_metrics_gsa" { display_name = "Operational Metrics GSA" } +resource "google_project_iam_binding" "bigquery_jobuser_binding" { + project = google_service_account.operational_metrics_gsa.project + role = "roles/bigquery.jobUser" + + members = [ + "serviceAccount:${google_service_account.operational_metrics_gsa.email}", + ] + + depends_on = [google_service_account.operational_metrics_gsa] +} + resource "kubernetes_namespace" "operational_metrics" { metadata { name = "operational-metrics" From 64e2156368582bdc1c09046af1f4e8abc1b9f059 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 5 Aug 2025 22:10:45 +0000 Subject: [PATCH 029/135] [CI] Remove some settings to keep terraform happy Some of these seem to have gotten fixed after upgrading to a more recent GCP provider version. There is still one left on the windows node pool that needs more investigation. --- premerge/gke_cluster/main.tf | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 846d8b92e..0b5772ee7 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -44,13 +44,6 @@ resource "google_container_node_pool" "llvm_premerge_linux_service" { workload_metadata_config { mode = "GKE_METADATA" } - # Terraform wants to recreate the node pool everytime whe running - # terraform apply unless we explicitly set this. - # TODO(boomanaiden154): Look into why terraform is doing this so we do - # not need this hack. - resource_labels = { - "goog-gke-node-pool-provisioning-model" = "on-demand" - } } } @@ -76,13 +69,6 @@ resource "google_container_node_pool" "llvm_premerge_linux" { "premerge-platform" : "linux" } disk_size_gb = 200 - # Terraform wants to recreate the node pool everytime whe running - # terraform apply unless we explicitly set this. - # TODO(boomanaiden154): Look into why terraform is doing this so we do - # not need this hack. - resource_labels = { - "goog-gke-node-pool-provisioning-model" = "on-demand" - } # Enable workload identity federation for this pool so that we can access # GCS buckets. @@ -114,13 +100,6 @@ resource "google_container_node_pool" "llvm_premerge_libcxx" { "premerge-platform-libcxx" : "linux-libcxx" } disk_size_gb = 200 - # Terraform wants to recreate the node pool everytime whe running - # terraform apply unless we explicitly set this. - # TODO(boomanaiden154): Look into why terraform is doing this so we do - # not need this hack. - resource_labels = { - "goog-gke-node-pool-provisioning-model" = "on-demand" - } } } @@ -159,13 +138,6 @@ resource "google_container_node_pool" "llvm_premerge_windows_2022" { } disk_size_gb = 200 disk_type = "pd-ssd" - # Terraform wants to recreate the node pool everytime whe running - # terraform apply unless we explicitly set this. - # TODO(boomanaiden154): Look into why terraform is doing this so we do - # not need this hack. - resource_labels = { - "goog-gke-node-pool-provisioning-model" = "on-demand" - } # Enable workload identity federation for this pool so that we can access # GCS buckets. From 2c7e81e300cd7b312e8102bfce8e702abbd94de4 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 5 Aug 2025 22:29:18 +0000 Subject: [PATCH 030/135] [CI] Update maintenance window comment I updated the maintenance window so running terraform apply would actually work given the restrictions put in place by GKE. I forgot to update the comment though, so it ended up out of sync. --- premerge/gke_cluster/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 0b5772ee7..958b31302 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -20,7 +20,7 @@ resource "google_container_cluster" "llvm_premerge" { workload_pool = "llvm-premerge-checks.svc.id.goog" } - # We prefer that maintenance is done on weekends between 02:00 and 05:00 + # We prefer that maintenance is done on weekends between 02:00 and 08:00 # UTC when commit traffic is low to avoid interruptions. maintenance_policy { recurring_window { From a7b388117238ade558ffe1062f3b96a505f4b0af Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 6 Aug 2025 15:01:25 -0700 Subject: [PATCH 031/135] [sanitizer] Don't stop processing if time.txt is missing --- zorg/buildbot/builders/sanitizers/buildbot_functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/sanitizers/buildbot_functions.sh b/zorg/buildbot/builders/sanitizers/buildbot_functions.sh index 11d9b09a5..0d3376282 100755 --- a/zorg/buildbot/builders/sanitizers/buildbot_functions.sh +++ b/zorg/buildbot/builders/sanitizers/buildbot_functions.sh @@ -630,5 +630,5 @@ function upload_stats() { -H Metadata-Flavor:Google > "${ROOT}/machine-type.txt" || true gsutil cp "${ROOT}/"{time,cpu,machine-type}".txt" "gs://sanitizer-buildbot-out/${BUILDBOT_BUILDERNAME}/${1}/${BUILDBOT_REVISION}/" || true fi - cat "${ROOT}/time.txt" + [[ ! -f "${ROOT}/time.txt" ]] || cat "${ROOT}/time.txt" } From 488824022ea0a4e1a4fe6296eb650f51ed5cd517 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 6 Aug 2025 15:04:04 -0700 Subject: [PATCH 032/135] [CI] Convert dispatch_jobs.py into an annotated builder This script should be used directly as an annotated builder and this script sets it up to do so. There are two main changes: 1. We print out a build step command. The monolithic scripts will eventually need to be upgraded to print out more fine grained build step information. 2. We take in a BUILDBOT_REVISION rather than the SHA on the command line. Passing the commit SHA along on the command line is complicated while BUILDBOT_REVISION is set by default for all annotated builders. Reviewers: dschuff, Keenuts, cmtice, lnihlen, gburgessiv Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/536 --- premerge/buildbot/dispatch_job.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/premerge/buildbot/dispatch_job.py b/premerge/buildbot/dispatch_job.py index 31fd4b152..b0b69a374 100644 --- a/premerge/buildbot/dispatch_job.py +++ b/premerge/buildbot/dispatch_job.py @@ -13,6 +13,7 @@ import dateutil import datetime import json +import os import kubernetes @@ -154,6 +155,7 @@ def main(commit_sha: str, platform: str): namespace = PLATFORM_TO_NAMESPACE[platform] latest_time = datetime.datetime.min v1_api = kubernetes.client.CoreV1Api() + print("@@@BUILD_STEP Build/Test@@@") while True: try: pod_finished, latest_time = print_logs( @@ -173,7 +175,10 @@ def main(commit_sha: str, platform: str): if __name__ == "__main__": - if len(sys.argv) != 3: - logging.fatal("Expected usage is dispatch_job.py {commit SHA} {platform}") + if len(sys.argv) != 2: + logging.fatal("Expected usage is dispatch_job.py {platform}") sys.exit(1) - main(sys.argv[1], sys.argv[2]) + if "BUILDBOT_REVISION" not in os.environ: + logging.fatal("Expected to have BUILDBOT_REVISION environment variable set.") + sys.exit(1) + main(sys.argv[1], os.environ["BUILDBOT_REVISION"]) From ed3804904344597d39eb0fd915caba2c67156b98 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 6 Aug 2025 15:05:09 -0700 Subject: [PATCH 033/135] [CI] Move premerge buildbot script to annotated builders This is where all of the other annotated builder scripts live, so move it over there. Reviewers: Keenuts, cmtice, lnihlen, dschuff, gburgessiv Pull Request: https://github.com/llvm/llvm-zorg/pull/537 --- premerge/buildbot/Dockerfile | 1 - .../buildbot/builders/annotated/premerge}/dispatch_job.py | 0 .../buildbot/builders/annotated/premerge}/dispatch_job_test.py | 0 3 files changed, 1 deletion(-) rename {premerge/buildbot => zorg/buildbot/builders/annotated/premerge}/dispatch_job.py (100%) rename {premerge/buildbot => zorg/buildbot/builders/annotated/premerge}/dispatch_job_test.py (100%) diff --git a/premerge/buildbot/Dockerfile b/premerge/buildbot/Dockerfile index 96ee435cc..103df931c 100644 --- a/premerge/buildbot/Dockerfile +++ b/premerge/buildbot/Dockerfile @@ -4,7 +4,6 @@ COPY requirements.lock.txt /requirements.lock.txt RUN pip3 install --break-system-packages -r /requirements.lock.txt && rm /requirements.lock.txt RUN mkdir /app WORKDIR /app -COPY dispatch_job.py . COPY startup.sh . RUN chmod +x startup.sh ENTRYPOINT /app/startup.sh diff --git a/premerge/buildbot/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py similarity index 100% rename from premerge/buildbot/dispatch_job.py rename to zorg/buildbot/builders/annotated/premerge/dispatch_job.py diff --git a/premerge/buildbot/dispatch_job_test.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job_test.py similarity index 100% rename from premerge/buildbot/dispatch_job_test.py rename to zorg/buildbot/builders/annotated/premerge/dispatch_job_test.py From c06d4460fd4006012fa940979d937438c06d86ea Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 6 Aug 2025 16:47:46 -0700 Subject: [PATCH 034/135] [CI] Setup Buildbot Infra for Premerge Builders This patch sets up the buildbot infrastructure for the premerge builders. Reviewers: dschuff, Keenuts, lnihlen, gburgessiv, cmtice, gkistanova Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/538 --- buildbot/osuosl/master/config/builders.py | 24 ++++++++++++++++++++++- buildbot/osuosl/master/config/workers.py | 10 ++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index f39fb8324..093bc95ac 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3535,7 +3535,29 @@ checkout_llvm_sources=False, script_interpreter=None, clean=True)}, - + + # Builders that test the premerge configuration + # These builders are specifically for running the premerge configuration + # postcommit (after changes have landed in main). The configuration for + # running these checks premerge exists in the monorepo inside the + # .github/workflows/premerge.yaml file. + {'name': "premerge-monolithic-linux", + 'workernames': ["premerge-us-central-linux", "premerge-us-west-linux"], + 'builddir': "premerge-monolithic-linux", + 'factory': AnnotatedBuilder.getAnnotatedBuildFactory( + script="premerge/dispatch_job.py", + checkout_llvm_sources=False, + extra_args=["Linux"], + depends_on_projects=["bolt", "clang", "clang-tools-extra", "compiler-rt", "flang", "flang-rt", "libc", "libclc", "lld", "lldb", "llvm", "mlir", "polly"])}, + + {'name': "premerge-monolithic-windows", + 'workernames': ["premerge-us-central-windows", "premerge-us-west-windows"], + 'builddir': "premerge-monolithic-windows", + 'factory': AnnotatedBuilder.getAnnotatedBuildFactory( + script="premerge/dispatch_job.py", + checkout_llvm_sources=False, + extra_args=["Windows"], + depends_on_projects=["clang-tools-extra", "clang", "libclc", "lld", "llvm", "mlir", "polly"])}, ] # LLDB remote-linux builder env variables. diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 20f419e48..945a07f8f 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -403,6 +403,16 @@ def get_all(): create_worker("rise-worker-3", properties={'jobs' : 32}, max_builds=1), create_worker("rise-worker-4", properties={'jobs' : 32}, max_builds=1), + # Builders that run the premerge configuration + # These workers are specifically for running the premerge configuration + # postcommit (after changes have landed in main). The workers for the + # infrastructure that runs the checks premerge are setup through Github + # Actions under the premerge/ folder in llvm-zorg. + create_worker("premerge-us-central-linux", properties={'jobs': 64}, max_builds=3), + create_worker("premerge-us-central-windows", properties={'jobs': 64}, max_builds=3), + create_worker("premerge-us-west-linux", properties={'jobs': 64}, max_builds=3), + create_worker("premerge-us-west-windows", properties={'jobs': 64}, max_builds=3), + # FIXME: A placeholder for annoying worker which nobody could stop. # adding it avoid logs spammed by failed authentication for that worker. create_worker("mlir-ubuntu-worker0"), From 0b250a4a4e130b7b02d5019feaf5a92e85862314 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 6 Aug 2025 16:50:04 -0700 Subject: [PATCH 035/135] [CI] Add support for windows to dispatch_job.py This patch adds in windows functionality to dispatch_job.py. Some minor refactoring was done around the start_build function so that the core functionality can be reused between Linux and Windows. Reviewers: cmtice, gburgessiv, lnihlen, dschuff, Keenuts Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/540 --- .../annotated/premerge/dispatch_job.py | 72 ++++++++++++++----- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index b0b69a374..63df72f21 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -17,50 +17,82 @@ import kubernetes -PLATFORM_TO_NAMESPACE = {"Linux": "llvm-premerge-linux-buildbot"} +PLATFORM_TO_NAMESPACE = { + "Linux": "llvm-premerge-linux-buildbot", + "Windows": "llvm-premerge-windows-buildbot", +} LOG_SECONDS_TO_QUERY = 10 SECONDS_QUERY_LOGS_EVERY = 5 -def start_build_linux(commit_sha: str, k8s_client) -> str: - """Spawns a pod to build/test LLVM at the specified SHA. +def start_build(k8s_client, pod_name: str, namespace: str, commands: list[str]) -> None: + """Spawns a pod to run the specified commands. Args: - commit_sha: The commit SHA to build/run the tests at. k8s_client: The kubernetes client instance to use for spawning the pod. - - Returns: - A string containing the name of the pod. + pod_name: The name of the pod to start. + namespace: The namespace to launch the pod in. + commands: The commands to run upon pod start. """ - pod_name = f"build-{commit_sha}" - commands = [ - "git clone --depth 100 https://github.com/llvm/llvm-project", - "cd llvm-project", - f"git checkout ${commit_sha}", - "export CC=clang", - "export CXX=clang++", - './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' - "echo BUILD FINISHED", - ] pod_definition = { "apiVersion": "v1", "kind": "Pod", "metadata": { "name": pod_name, - "namespace": PLATFORM_TO_NAMESPACE["Linux"], + "namespace": namespace, }, "spec": { "containers": [ { "name": "build", "image": "ghcr.io/llvm/ci-ubuntu-24.04", - "command": ["/bin/bash", "-c", ";".join(commands)], + "commands": commands, } ], "restartPolicy": "Never", }, } kubernetes.utils.create_from_dict(k8s_client, pod_definition) + + +def start_build_linux(commit_sha: str, k8s_client) -> str: + """Starts a pod to build/test on Linux at the specified SHA.""" + pod_name = f"build-{commit_sha}" + commands = [ + "git clone --depth 100 https://github.com/llvm/llvm-project", + "cd llvm-project", + f"git checkout ${commit_sha}", + "export CC=clang", + "export CXX=clang++", + './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' + "echo BUILD FINISHED", + ] + start_build( + k8s_client, + pod_name, + PLATFORM_TO_NAMESPACE["Linux"], + ["/bin/bash", "-c", ";".join(commands)], + ) + return pod_name + + +def start_build_windows(commit_sha: str, k8s_client): + """Starts a pod to build/test on Windows at the specified SHA.""" + pod_name = f"build-{commit_sha}" + bash_commands = [ + "git clone --depth 100 https://github.com/llvm/llvm-project", + "cd llvm-project", + f"git checkout ${commit_sha}", + '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-cir check-clang-tools check-lld check-llvm check-mlir check-polly"', + "echo BUILD FINISHED", + ] + commands = [ + "call C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 -host_arch=amd64", + "bash", + "-c", + ";".join(bash_commands), + ] + start_build(k8s_client, pod_name, PLATFORM_TO_NAMESPACE["Windows"], commands) return pod_name @@ -150,6 +182,8 @@ def main(commit_sha: str, platform: str): k8s_client = kubernetes.client.ApiClient() if platform == "Linux": pod_name = start_build_linux(commit_sha, k8s_client) + elif platform == "Windows": + pod_name = start_build_windows(commit_sha, k8s_client) else: raise ValueError("Unrecognized platform.") namespace = PLATFORM_TO_NAMESPACE[platform] From 90e03eb4e33b96f02f93afb1c715b0f8fa8747c6 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 6 Aug 2025 16:53:38 -0700 Subject: [PATCH 036/135] [CI] Set POSTCOMMIT_CI variable in pods This ensures that we will be printing the correct output type after https://github.com/llvm/llvm-project/pull/152197 lands. Reviewers: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/541 --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 63df72f21..fb9cbd5cb 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -64,6 +64,7 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: f"git checkout ${commit_sha}", "export CC=clang", "export CXX=clang++", + "expot POSTCOMMIT_CI=1", './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' "echo BUILD FINISHED", ] @@ -83,6 +84,7 @@ def start_build_windows(commit_sha: str, k8s_client): "git clone --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", f"git checkout ${commit_sha}", + "export POSTCOMMIT_CI=1", '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-cir check-clang-tools check-lld check-llvm check-mlir check-polly"', "echo BUILD FINISHED", ] From 4e99697af55b215c1242d11a06c3d5bd156452bf Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 08:20:14 -0700 Subject: [PATCH 037/135] [CI] Add Terraform Plumbing for Postcommit CI This patch does all the Terraform plumbing necessary for setting up the postcommit buildbots. This leaves out the actual deployments for a future patch. Reviewers: Keenuts, lnihlen, gburgessiv, cmtice, dschuff Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/542 --- premerge/gke_cluster/main.tf | 84 ++++++++++++++++++++++++ premerge/main.tf | 27 ++++++++ premerge/premerge_resources/main.tf | 45 +++++++++++++ premerge/premerge_resources/variables.tf | 20 ++++++ 4 files changed, 176 insertions(+) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 958b31302..29a9111b7 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -78,6 +78,40 @@ resource "google_container_node_pool" "llvm_premerge_linux" { } } +# Buildbot here refers specifically to the LLVM Buildbot postcommit +# testing infrastructure. These machines are used specifically for testing +# commits after they have landed in main. +resource "google_container_node_pool" "llvm_buildbot_linux" { + name = "llvm-buildbot-linux" + location = var.region + cluster = google_container_cluster.llvm_premerge.name + initial_node_count = 0 + + autoscaling { + total_min_node_count = 0 + total_max_node_count = 3 + } + + node_config { + machine_type = var.linux_machine_type + taint { + key = "buildbot-platform" + value = "linux" + effect = "NO_SCHEDULE" + } + labels = { + "buildbot-platform" : "linux" + } + disk_size_gb = 200 + + # Enable workload identity federation for this pool so that we can access + # GCS buckets. + workload_metadata_config { + mode = "GKE_METADATA" + } + } +} + resource "google_container_node_pool" "llvm_premerge_libcxx" { name = "llvm-premerge-libcxx" location = var.region @@ -118,6 +152,56 @@ resource "google_container_node_pool" "llvm_premerge_windows_2022" { # a node.kubernetes.io/os taint for windows nodes. node_config { machine_type = var.windows_machine_type + labels = { + "buildbot-platform" : "windows-2022" + } + image_type = "WINDOWS_LTSC_CONTAINERD" + windows_node_config { + osversion = "OS_VERSION_LTSC2022" + } + # Add a script that runs on the initial boot to disable Windows Defender. + # Windows Defender causes an increase in test times by approximately an + # order of magnitude. + metadata = { + "sysprep-specialize-script-ps1" = "Set-MpPreference -DisableRealtimeMonitoring $true" + # Terraform wants to recreate the node pool everytime whe running + # terraform apply unless we explicitly set this. + # TODO(boomanaiden154): Look into why terraform is doing this so we do + # not need this hack. + "disable-legacy-endpoints" = "true" + } + disk_size_gb = 200 + disk_type = "pd-ssd" + + # Enable workload identity federation for this pool so that we can access + # GCS buckets. + workload_metadata_config { + mode = "GKE_METADATA" + } + } +} + +# Buildbot here refers specifically to the LLVM Buildbot postcommit +# testing infrastructure. These machines are used specifically for testing +# commits after they have landed in main. +resource "google_container_node_pool" "llvm_buildbot_window_2022" { + name = "llvm-buildbot-windows-2022" + location = var.region + cluster = google_container_cluster.llvm_premerge.name + initial_node_count = 0 + + autoscaling { + total_min_node_count = 0 + total_max_node_count = 3 + } + + # We do not set a taint for the windows nodes as kubernetes by default sets + # a node.kubernetes.io/os taint for windows nodes. + node_config { + # Use the Linux machine type here as we want to keep the windows machines + # symmetric with the Linux machines for faster builds. Throughput is not + # as much of a concern postcommit. + machine_type = var.linux_machine_type labels = { "premerge-platform" : "windows-2022" } diff --git a/premerge/main.tf b/premerge/main.tf index 5e3798785..cde4fa7bb 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -121,6 +121,25 @@ data "google_secret_manager_secret_version" "grafana_token" { secret = "llvm-premerge-testing-grafana-token" } +# Buildbot here refers specifically to the LLVM Buildbot postcommit +# testing infrastructure. These machines are used specifically for testing +# commits after they have landed in main. +data "google_secret_manager_secret_version" "us_central_linux_buildbot_password" { + secret = "llvm-buildbot-linux-us-central" +} + +data "google_secret_manager_secret_version" "us_central_windows_buildbot_password" { + secret = "llvm-buildbot-windows-us-central" +} + +data "google_secret_manager_secret_version" "us_west_linux_buildbot_password" { + secret = "llvm-buildbot-linux-us-west" +} + +data "google_secret_manager_secret_version" "us_west_windows_buildbot_password" { + secret = "llvm-buildbot-windows-us-west" +} + provider "kubernetes" { host = "https://${module.premerge_cluster_us_central.endpoint}" token = data.google_client_config.current.access_token @@ -152,6 +171,10 @@ module "premerge_cluster_us_central_resources" { linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.linux_object_cache_gcp_service_account_email windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.windows_2022_object_cache_gcp_service_account_email github_arc_version = "0.12.1" + linux_buildbot_name = "premerge-us-central-linux" + linux_buildbot_password = data.google_secret_manager_secret_version.us_central_linux_buildbot_password.secret_data + windows_buildbot_name = "premerge-us-central-windows" + windows_buildbot_password = data.google_secret_manager_secret_version.us_central_windows_buildbot_password.secret_data providers = { kubernetes = kubernetes.llvm-premerge-us-central helm = helm.llvm-premerge-us-central @@ -173,6 +196,10 @@ module "premerge_cluster_us_west_resources" { linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.linux_object_cache_gcp_service_account_email windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.windows_2022_object_cache_gcp_service_account_email github_arc_version = "0.12.1" + linux_buildbot_name = "premerge-us-west-linux" + linux_buildbot_password = data.google_secret_manager_secret_version.us_west_linux_buildbot_password.secret_data + windows_buildbot_name = "premerge-us-west-windows" + windows_buildbot_password = data.google_secret_manager_secret_version.us_west_windows_buildbot_password.secret_data providers = { kubernetes = kubernetes.llvm-premerge-us-west helm = helm.llvm-premerge-us-west diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index 546880f35..cf03d8c1e 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -47,6 +47,51 @@ resource "kubernetes_namespace" "llvm_premerge_windows_2022_runners" { } } +# Buildbot here refers specifically to the LLVM Buildbot postcommit +# testing infrastructure. These machines are used specifically for testing +# commits after they have landed in main. +resource "kubernetes_namespace" "llvm_premerge_linux_buildbot" { + metadata { + name = "llvm-premerge-linux-buildbot" + } +} + +resource "kubernetes_namespace" "llvm_premerge_windows_2022_buildbot" { + metadata { + name = "llvm-premerge-windows-2022-buildbot" + } +} + +resource "kubernetes_secret" "linux_buildbot_password" { + metadata { + name = "linux-buildbot-password" + namespace = "llvm-premerge-linux-buildbot" + } + + data = { + "password" = var.linux_buildbot_password + } + + type = "Opaque" + + depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot] +} + +resource "kubernetes_secret" "windows_2022_buildbot_password" { + metadata { + name = "windows-buildbot-password" + namespace = "llvm-premerge-windows-buildbot" + } + + data = { + "password" = var.windows_buildbot_password + } + + type = "Opaque" + + depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot] +} + resource "kubernetes_secret" "linux_github_pat" { metadata { name = "github-token" diff --git a/premerge/premerge_resources/variables.tf b/premerge/premerge_resources/variables.tf index 210c711fa..ad69e1e38 100644 --- a/premerge/premerge_resources/variables.tf +++ b/premerge/premerge_resources/variables.tf @@ -104,3 +104,23 @@ variable "windows_2022_object_cache_gcp_service_account_email" { description = "The email associated with the service account for accessing the object cache on Windows." type = string } + +variable "linux_buildbot_name" { + description = "The name of the linux buildbot that will run tests postcommit." + type = string +} + +variable "linux_buildbot_password" { + description = "The password for the linux buildbot that will run tests postcommit." + type = string +} + +variable "windows_buildbot_name" { + description = "The name of the windows buildbot that will run tests postcommit." + type = string +} + +variable "windows_buildbot_password" { + description = "The password for the windows buildbot that will run tests postcommit." + type = string +} From b7a2c1f05bfbe7de5b3f418402fa9e627d26d468 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 15:25:15 +0000 Subject: [PATCH 038/135] [CI] Fix labels I updated some labels so that the windows buildbot nodes would have the windows-2022 label, but apparently updated the wrong ones. This patch fixes that. --- premerge/gke_cluster/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 29a9111b7..5801d9000 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -153,7 +153,7 @@ resource "google_container_node_pool" "llvm_premerge_windows_2022" { node_config { machine_type = var.windows_machine_type labels = { - "buildbot-platform" : "windows-2022" + "premerge-platform" : "windows-2022" } image_type = "WINDOWS_LTSC_CONTAINERD" windows_node_config { @@ -203,7 +203,7 @@ resource "google_container_node_pool" "llvm_buildbot_window_2022" { # as much of a concern postcommit. machine_type = var.linux_machine_type labels = { - "premerge-platform" : "windows-2022" + "buildbot-platform" : "windows-2022" } image_type = "WINDOWS_LTSC_CONTAINERD" windows_node_config { From 32b4f66aa6398fd5a4cddfe2dd4a45fd0307de65 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 15:30:01 +0000 Subject: [PATCH 039/135] [CI] Fix invalid namespace I updated the windows namespace name to be versioned but forgot to update some users. This patch fixes that. --- premerge/premerge_resources/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index cf03d8c1e..1a892f848 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -80,7 +80,7 @@ resource "kubernetes_secret" "linux_buildbot_password" { resource "kubernetes_secret" "windows_2022_buildbot_password" { metadata { name = "windows-buildbot-password" - namespace = "llvm-premerge-windows-buildbot" + namespace = "llvm-premerge-windows-2022-buildbot" } data = { From 8ca09672b3df64528943e8f12a93e2a975c8bcbb Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 17:05:43 +0000 Subject: [PATCH 040/135] [CI] Hard Error on Logging Failure This makes dispatch_job.py throw a hard error if we cannot get the logs from a pod to prevent the builder from sitting in an infinite loop. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index fb9cbd5cb..c6486445a 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -205,7 +205,8 @@ def main(commit_sha: str, platform: str): "Cannot yet read logs from the pod: waiting for the container to start." ) else: - logging.warning(f"Failed to get logs from the pod: {log_exception}") + logging.error(f"Failed to get logs from the pod: {log_exception}") + break time.sleep(SECONDS_QUERY_LOGS_EVERY) v1_api.delete_namespaced_pod(pod_name, namespace) From 7d6229e615e52a1e8d3c099f3623b86f51c3ddc8 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 10:07:03 -0700 Subject: [PATCH 041/135] [CI] Add Buildbot Deployments This patch adds the buildbot deployments. These are pods designed to run the buildbot worker in the service pool and then spawn jobs onto a node pool inside their own pods. Reviewers: Keenuts, gburgessiv, lnihlen, dschuff, cmtice Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/544 --- premerge/buildbot_deployment.yaml | 34 +++++++++++++++++++ premerge/premerge_resources/main.tf | 13 +++++++ .../annotated/premerge/dispatch_job.py | 5 +-- 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 premerge/buildbot_deployment.yaml diff --git a/premerge/buildbot_deployment.yaml b/premerge/buildbot_deployment.yaml new file mode 100644 index 000000000..d8976d94f --- /dev/null +++ b/premerge/buildbot_deployment.yaml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ${ buildbot_name } + namespace: ${ buildbot_namespace } + labels: + app: ${ buildbot_name } +spec: + selector: + matchLabels: + app: ${ buildbot_name } + template: + metadata: + labels: + app: ${ buildbot_name } + spec: + containers: + - name: buildbot + image: ghcr.io/llvm/premerge-buildbot:latest + env: + - name: BUILDBOT_NAME + value: ${ buildbot_name } + - name: BUILDBOT_PASSWORD + valueFrom: + secretKeyRef: + name: ${ secret_name } + key: password + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "1024Mi" + cpu: "750m" diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index 1a892f848..48e78b607 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -279,6 +279,19 @@ resource "helm_release" "github_actions_runner_set_libcxx_next" { ] } +resource "kubernetes_manifest" "linux_buildbot_deployment" { + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password" })) + + depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot, kubernetes_secret.linux_buildbot_password] +} + +# TODO(boomanaiden154): Enable windows buildbots once Linux is stable. +#resource "kubernetes_manifest" "windows_buildbot_deployment" { +# manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password" })) +# +# depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_buildbot_password] +#} + resource "kubernetes_service_account" "linux_object_cache_ksa" { metadata { name = var.linux_runners_kubernetes_service_account_name diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index c6486445a..233d1927a 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -64,8 +64,9 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: f"git checkout ${commit_sha}", "export CC=clang", "export CXX=clang++", - "expot POSTCOMMIT_CI=1", - './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' + "export POSTCOMMIT_CI=1", + "echo testing maybe succeded.", + #'./.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' "echo BUILD FINISHED", ] start_build( From 1cd24c0e3cda17f64551b2c2a6237406a223b64b Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 17:37:49 +0000 Subject: [PATCH 042/135] [CI] Misc Buildbot Container Fixes The buildbot container was missing a couple dependencies that were needed to get the worker actually started. The buildbot deployment also had the RAM specified in Mi, but 1024Mi gets turned into 1Gi by k8s which then confuses terraform as it thinks the state is inconsistent. --- premerge/buildbot/Dockerfile | 6 +++++- premerge/buildbot_deployment.yaml | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/premerge/buildbot/Dockerfile b/premerge/buildbot/Dockerfile index 103df931c..c295dd6d0 100644 --- a/premerge/buildbot/Dockerfile +++ b/premerge/buildbot/Dockerfile @@ -1,5 +1,9 @@ FROM ubuntu:24.04 -RUN apt-get update && apt-get install -y python3 python3-pip +RUN apt-get update && apt-get install -y \ + python3 \ + python3-pip \ + git \ + python-is-python3 COPY requirements.lock.txt /requirements.lock.txt RUN pip3 install --break-system-packages -r /requirements.lock.txt && rm /requirements.lock.txt RUN mkdir /app diff --git a/premerge/buildbot_deployment.yaml b/premerge/buildbot_deployment.yaml index d8976d94f..97e891179 100644 --- a/premerge/buildbot_deployment.yaml +++ b/premerge/buildbot_deployment.yaml @@ -30,5 +30,5 @@ spec: memory: "512Mi" cpu: "500m" limits: - memory: "1024Mi" + memory: "1Gi" cpu: "750m" From ce1f5ca5b85251fc747e59a151d5f534b9712da4 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 18:43:59 +0000 Subject: [PATCH 043/135] [CI] Fix Minor Issues in dispatch_job.py There were a couple issues with dispatch_job.py that needed fixing: 1. BUILDBOT_REVISION/Platform were swapped. 2. k8s does not recognize commands in pod definitions, only command. 3. We need to use the function to load the in cluster config rather than load what kubectl would use. 4. Move the build step start message into the pod. --- premerge/buildbot_deployment.yaml | 2 +- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/premerge/buildbot_deployment.yaml b/premerge/buildbot_deployment.yaml index 97e891179..9e91776ab 100644 --- a/premerge/buildbot_deployment.yaml +++ b/premerge/buildbot_deployment.yaml @@ -18,7 +18,7 @@ spec: - name: buildbot image: ghcr.io/llvm/premerge-buildbot:latest env: - - name: BUILDBOT_NAME + - name: BUILDBOT_USERNAME value: ${ buildbot_name } - name: BUILDBOT_PASSWORD valueFrom: diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 233d1927a..8cafca686 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -46,7 +46,7 @@ def start_build(k8s_client, pod_name: str, namespace: str, commands: list[str]) { "name": "build", "image": "ghcr.io/llvm/ci-ubuntu-24.04", - "commands": commands, + "command": commands, } ], "restartPolicy": "Never", @@ -59,6 +59,7 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: """Starts a pod to build/test on Linux at the specified SHA.""" pod_name = f"build-{commit_sha}" commands = [ + 'echo "@@@BUILD_STEP Cloning Repository@@@"' "git clone --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", f"git checkout ${commit_sha}", @@ -181,7 +182,7 @@ def print_logs( def main(commit_sha: str, platform: str): - kubernetes.config.load_kube_config() + kubernetes.config.load_incluster_config() k8s_client = kubernetes.client.ApiClient() if platform == "Linux": pod_name = start_build_linux(commit_sha, k8s_client) @@ -192,7 +193,6 @@ def main(commit_sha: str, platform: str): namespace = PLATFORM_TO_NAMESPACE[platform] latest_time = datetime.datetime.min v1_api = kubernetes.client.CoreV1Api() - print("@@@BUILD_STEP Build/Test@@@") while True: try: pod_finished, latest_time = print_logs( @@ -219,4 +219,4 @@ def main(commit_sha: str, platform: str): if "BUILDBOT_REVISION" not in os.environ: logging.fatal("Expected to have BUILDBOT_REVISION environment variable set.") sys.exit(1) - main(sys.argv[1], os.environ["BUILDBOT_REVISION"]) + main(os.environ["BUILDBOT_REVISION"], sys.argv[1]) From 5739fc7714bbbadf9313d8639a72885212d52034 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 19:01:50 +0000 Subject: [PATCH 044/135] [CI] Add sleep to buildbot startup script buildbot-worker start does start the buildbot worker, but in a background process. This means that the container then immediately exists, also killing the worker. Add a sleep at the end to prevent termination. --- premerge/buildbot/startup.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/premerge/buildbot/startup.sh b/premerge/buildbot/startup.sh index 7f39b6463..e7429b633 100644 --- a/premerge/buildbot/startup.sh +++ b/premerge/buildbot/startup.sh @@ -18,3 +18,5 @@ echo "Google LLVM Premerge Infra Rotation " \ } > /worker/info/host buildbot-worker start /worker + +sleep 31536000000 From 5b4a64448dcf0972096bb3dabc3e567c6c072fbf Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 19:20:28 +0000 Subject: [PATCH 045/135] [CI] Add missing comma Or otherwise the strings get concatenated... --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 8cafca686..0e7c6b060 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -59,7 +59,7 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: """Starts a pod to build/test on Linux at the specified SHA.""" pod_name = f"build-{commit_sha}" commands = [ - 'echo "@@@BUILD_STEP Cloning Repository@@@"' + 'echo "@@@BUILD_STEP Cloning Repository@@@"', "git clone --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", f"git checkout ${commit_sha}", From 64f2e1bf1e7e8fdfe0c7d2dae0c229d089feba03 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 19:29:39 +0000 Subject: [PATCH 046/135] [CI] Minor fixes in dispatch_job.py 1. Using $ in python f-strings that will get interpreted by bash creates bash variables that then do not map to anything. 2. Move the step annotation back to where it was. Otherwise the timestamps show up before the annotation and it doesn't get interpeted correctly. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 0e7c6b060..963f45329 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -59,10 +59,9 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: """Starts a pod to build/test on Linux at the specified SHA.""" pod_name = f"build-{commit_sha}" commands = [ - 'echo "@@@BUILD_STEP Cloning Repository@@@"', "git clone --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", - f"git checkout ${commit_sha}", + f"git checkout {commit_sha}", "export CC=clang", "export CXX=clang++", "export POSTCOMMIT_CI=1", @@ -193,6 +192,7 @@ def main(commit_sha: str, platform: str): namespace = PLATFORM_TO_NAMESPACE[platform] latest_time = datetime.datetime.min v1_api = kubernetes.client.CoreV1Api() + print("@@@BUILD_STEP Build/Test@@@") while True: try: pod_finished, latest_time = print_logs( From 142f684c1b59ee5f515a1edffae0cb3b54dbfad2 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 19:40:57 +0000 Subject: [PATCH 047/135] [CI] Add Test Build to Premerge Builder This patch makes it so that the premerge linux builder will do a test builder. This allows us to ensure things are working without having to run the very expensive full build. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 963f45329..b9571371a 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -65,7 +65,7 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: "export CC=clang", "export CXX=clang++", "export POSTCOMMIT_CI=1", - "echo testing maybe succeded.", + './.ci/monolithic-linux.sh "polly" "check-polly" "" "" "" OFF', #'./.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' "echo BUILD FINISHED", ] From 6fd7ec08eb47dd8e44a9950df7ffd3f1002175b5 Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Fri, 8 Aug 2025 16:21:04 -0400 Subject: [PATCH 048/135] [CI] Stop counting commit authors are reviewers (#545) When a pull request author comments on their own pull request, it counts as a review and is captured when we query the GraphQL API for reviewers of a pull request. We shouldn't be counting self-reviews when collecting LLVM metrics, so this change removes the author of a commit from the set of reviewers. --- llvm-ops-metrics/ops-container/process_llvm_commits.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm-ops-metrics/ops-container/process_llvm_commits.py b/llvm-ops-metrics/ops-container/process_llvm_commits.py index 7ee7d788a..6cc3c39d4 100644 --- a/llvm-ops-metrics/ops-container/process_llvm_commits.py +++ b/llvm-ops-metrics/ops-container/process_llvm_commits.py @@ -192,6 +192,10 @@ def query_for_reviews( for review in pull_request["reviews"]["nodes"] ]) + # There are cases where the commit author is counted as a reviewer. This is + # against what we want to measure, so remove them from the set of reviewers. + commit_info.reviewers.discard(commit_info.commit_author) + return list(new_commits.values()) From 0ca4379e35c2121494d35395395a5b4ba5f4d8a3 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 20:22:34 +0000 Subject: [PATCH 049/135] [CI] Run Linux Buildbot Pods on Buildbot Nodes This patch updates dispatch_job.py to actually run the nodes that the buildbot worker spawns on the buildbot nodes. Otherwise they default to the service nodes which only have four cores. Overloading the service nodes could also easily become a problem. --- .../builders/annotated/premerge/dispatch_job.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index b9571371a..9fd5999d7 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -42,11 +42,24 @@ def start_build(k8s_client, pod_name: str, namespace: str, commands: list[str]) "namespace": namespace, }, "spec": { + "tolerations": [ + { + "key": "buildbot-platform", + "operator": "Equal", + "value": "linux", + "effect": "NoSchedule", + } + ], + "nodeSelector": {"buildbot-platform": "linux"}, "containers": [ { "name": "build", "image": "ghcr.io/llvm/ci-ubuntu-24.04", "command": commands, + "resources": { + "requests": {"cpu": 55, "memory": "200Gi"}, + "limits": {"cpu": 64, "memory": "256Gi"}, + }, } ], "restartPolicy": "Never", From 8921647aaa0909680f5eaef15edf7a42f0e8afdc Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 8 Aug 2025 20:47:16 +0000 Subject: [PATCH 050/135] [CI] Wire up Buildbot Service Accounts This patch wires up service accounts for the buildbot worker pods. This is necessary because otherwise we cannot access the k8s API, which the worker needs to be able to do to start builds/get logs. --- premerge/buildbot_deployment.yaml | 1 + premerge/premerge_resources/main.tf | 90 +++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/premerge/buildbot_deployment.yaml b/premerge/buildbot_deployment.yaml index 9e91776ab..ffb914c02 100644 --- a/premerge/buildbot_deployment.yaml +++ b/premerge/buildbot_deployment.yaml @@ -14,6 +14,7 @@ spec: labels: app: ${ buildbot_name } spec: + serviceAccountName: buildbot-ksa containers: - name: buildbot image: ghcr.io/llvm/premerge-buildbot:latest diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index 48e78b607..13a456471 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -279,12 +279,102 @@ resource "helm_release" "github_actions_runner_set_libcxx_next" { ] } +resource "kubernetes_role" "linux_buildbot_role" { + metadata { + name = "buildbot-role" + namespace = "llvm-premerge-linux-buildbot" + } + + rule { + api_groups = [""] + resources = ["pods", "pods/log"] + verbs = ["create", "delete", "get"] + } + + depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot] +} + +resource "kubernetes_service_account" "linux_buildbot_ksa" { + metadata { + name = "buildbot-ksa" + namespace = "llvm-premerge-linux-buildbot" + } + + depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot] +} + +resource "kubernetes_role_binding" "linux_buildbot_role_binding" { + metadata { + name = "buildbot-role-binding" + namespace = "llvm-premerge-linux-buildbot" + } + + role_ref { + kind = "Role" + name = "buildbot-role" + api_group = "rbac.authorization.k8s.io" + } + + subject { + kind = "ServiceAccount" + name = "buildbot-ksa" + namespace = "llvm-premerge-linux-buildbot" + } + + depends_on = [kubernetes_role.linux_buildbot_role, kubernetes_service_account.linux_buildbot_ksa] +} + resource "kubernetes_manifest" "linux_buildbot_deployment" { manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password" })) depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot, kubernetes_secret.linux_buildbot_password] } +resource "kubernetes_role" "windows_2022_buildbot_role" { + metadata { + name = "buildbot-role" + namespace = "llvm-premerge-windows-2022-buildbot" + } + + rule { + api_groups = [""] + resources = ["pods", "pods/log"] + verbs = ["create", "delete", "get"] + } + + depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot] +} + +resource "kubernetes_service_account" "windows_2022_buildbot_ksa" { + metadata { + name = "buildbot-ksa" + namespace = "llvm-premerge-windows-2022-buildbot" + } + + depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot] +} + +resource "kubernetes_role_binding" "windows_2022_buildbot_role_binding" { + metadata { + name = "buildbot-role-binding" + namespace = "llvm-premerge-windows-2022-buildbot" + } + + role_ref { + kind = "Role" + name = "buildbot-role" + api_group = "rbac.authorization.k8s.io" + } + + subject { + kind = "ServiceAccount" + name = "buildbot-service-account" + namespace = "llvm-premerge-windows-2022-buildbot" + } + + depends_on = [kubernetes_role.windows_2022_buildbot_role, kubernetes_service_account.windows_2022_buildbot_ksa] +} + # TODO(boomanaiden154): Enable windows buildbots once Linux is stable. #resource "kubernetes_manifest" "windows_buildbot_deployment" { # manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password" })) From 0d675b53fedc6454611a0db78a47c8e9a394d58f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 04:52:17 +0000 Subject: [PATCH 051/135] [CI] Get dispatch_job.py Working for Windows dispatch_job.py had some issues working on Windows, namely around the scripts not actually working. This patch fixes that. --- .../annotated/premerge/dispatch_job.py | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 9fd5999d7..5b0473705 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -19,43 +19,59 @@ PLATFORM_TO_NAMESPACE = { "Linux": "llvm-premerge-linux-buildbot", - "Windows": "llvm-premerge-windows-buildbot", + "Windows": "llvm-premerge-windows-2022-buildbot", +} +PLATFORM_TAINT = { + "Linux": ("buildbot-platform", "linux"), + "Windows": ("node.kubernetes.io/os", "windows"), +} +PLATFORM_TO_BUILDBOT_PLATFORM = {"Linux": "linux", "Windows": "windows-2022"} +PLATFORM_CONTAINER = { + "Linux": "ghcr.io/llvm/ci-ubuntu-24.04", + "Windows": "ghcr.io/llvm/ci-windows-2022", } LOG_SECONDS_TO_QUERY = 10 SECONDS_QUERY_LOGS_EVERY = 5 -def start_build(k8s_client, pod_name: str, namespace: str, commands: list[str]) -> None: +def start_build( + k8s_client, pod_name: str, platform: str, commands: list[str], args: list[str] +) -> None: """Spawns a pod to run the specified commands. Args: k8s_client: The kubernetes client instance to use for spawning the pod. pod_name: The name of the pod to start. - namespace: The namespace to launch the pod in. + platform: The platform to launch the pod on. commands: The commands to run upon pod start. + args: Arguments to pass to the command upon pod start. """ + taint_key, taint_value = PLATFORM_TAINT[platform] pod_definition = { "apiVersion": "v1", "kind": "Pod", "metadata": { "name": pod_name, - "namespace": namespace, + "namespace": PLATFORM_TO_NAMESPACE[platform], }, "spec": { "tolerations": [ { - "key": "buildbot-platform", + "key": taint_key, "operator": "Equal", - "value": "linux", + "value": taint_value, "effect": "NoSchedule", } ], - "nodeSelector": {"buildbot-platform": "linux"}, + "nodeSelector": { + "buildbot-platform": PLATFORM_TO_BUILDBOT_PLATFORM[platform] + }, "containers": [ { "name": "build", - "image": "ghcr.io/llvm/ci-ubuntu-24.04", + "image": PLATFORM_CONTAINER[platform], "command": commands, + "args": args, "resources": { "requests": {"cpu": 55, "memory": "200Gi"}, "limits": {"cpu": 64, "memory": "256Gi"}, @@ -83,10 +99,7 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: "echo BUILD FINISHED", ] start_build( - k8s_client, - pod_name, - PLATFORM_TO_NAMESPACE["Linux"], - ["/bin/bash", "-c", ";".join(commands)], + k8s_client, pod_name, "Linux", ["/bin/bash", "-c", ";".join(commands)], [] ) return pod_name @@ -97,18 +110,19 @@ def start_build_windows(commit_sha: str, k8s_client): bash_commands = [ "git clone --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", - f"git checkout ${commit_sha}", + f"git checkout {commit_sha}", "export POSTCOMMIT_CI=1", - '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-cir check-clang-tools check-lld check-llvm check-mlir check-polly"', + ".ci/monolithic-windows.sh 'polly;mlir' 'check-polly check-mlir'", + #'.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-cir check-clang-tools check-lld check-llvm check-mlir check-polly"', "echo BUILD FINISHED", ] + bash_command = f"bash -c \"{';'.join(bash_commands)}\"\"" commands = [ "call C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 -host_arch=amd64", - "bash", - "-c", - ";".join(bash_commands), + bash_command, ] - start_build(k8s_client, pod_name, PLATFORM_TO_NAMESPACE["Windows"], commands) + args = ["/c " + " && ".join(commands)] + start_build(k8s_client, pod_name, "Windows", ["cmd.exe"], args) return pod_name From 1b3a0a7cfa1f1133e7dff7fc49bb508029fb13c6 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 10 Aug 2025 22:00:10 +0000 Subject: [PATCH 052/135] [CI] Fix Buildbot Service Account Name The buildbot service account name was incorrect for the windows role binding. --- premerge/premerge_resources/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index 13a456471..eec4c8414 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -368,7 +368,7 @@ resource "kubernetes_role_binding" "windows_2022_buildbot_role_binding" { subject { kind = "ServiceAccount" - name = "buildbot-service-account" + name = "buildbot-ksa" namespace = "llvm-premerge-windows-2022-buildbot" } From 7bfb946fd7d1bbaae27f7ac8bd210503cb686dff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Mon, 11 Aug 2025 17:16:48 +0200 Subject: [PATCH 053/135] [CI] Update documentation about token rotation (#539) Adding documentation about usage of Grafana Cloud tokens in the infra, and how to rotate them if required. --- premerge/cluster-management.md | 100 +++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/premerge/cluster-management.md b/premerge/cluster-management.md index 3caec1e2a..2764b4577 100644 --- a/premerge/cluster-management.md +++ b/premerge/cluster-management.md @@ -237,3 +237,103 @@ ensure they are in a state consistent with the terraform IaC definitions. [Strategies for Upgrading ARC](https://www.kenmuse.com/blog/strategies-for-upgrading-arc/) outlines how ARC should be upgraded and why. + +## Grafana tokens + +The cluster has multiple services communicating with Grafana Cloud: + - the metrics container + - per-node monitoring (Grafana Alloy, Prometheus node exporter) + - per-cluster monitoring (Opencost, Alloy) + +The full description of the services can be found on the [k8s-monitoring Helm +chart repository](https://github.com/grafana/k8s-monitoring-helm). + +Authentication to Grafana Cloud is handled through `Cloud access policies`. +Currently, the cluster uses 2 kind of tokens: + + - `llvm-premerge-metrics-grafana-api-key` + Used by: metrics container + Scopes: `metrics:write` + + - `llvm-premerge-grafana-token` + Used by: Alloy, Prometheus node exporter & other services. + Scopes: `metrics:read`, `metrics:write`, `logs:write` + +We've setup 2 cloud policies with matching names so scopes are already set up. +If you need to rotate tokens, you need to: + + 1. Login to Grafana Cloud + 2. Navigate to `Home > Administration > Users and Access > Cloud Access Policies` + 3. Create a new token in the desired cloud access policy. + 4. Log in `GCP > Security > Secret Manager` + 5. Click on the secret to update. + 6. Click on `New version` + 7. Paste the token displayed in Grafana and tick `Disable all past versions`. + +At this stage, you should have a **single** enabled secret on GCP. If you +display the value, you should see the Grafana token. + +Then, go in the `llvm-zorg` repository. Make sure you pulled the last changes +in `main`, and then as usual, run `terraform apply`. + +At this stage, you made sure newly created services will use the token, but +existing deployment still rely on the old tokens. You need to manually restart +the deployments on both `us-west1` and `us-central1-a` clusters. + +Run: + +``` bash +gcloud container clusters get-credentials llvm-premerge-cluster-us-west --location us-west1 +kubectl scale --replicas=0 --namespace grafana deployments \ + grafana-k8s-monitoring-opencost \ + grafana-k8s-monitoring-kube-state-metrics \ + grafana-k8s-monitoring-alloy-events + +gcloud container clusters get-credentials llvm-premerge-cluster-us-central --location us-central1-a +kubectl scale --replicas=0 --namespace grafana deployments \ + grafana-k8s-monitoring-opencost \ + grafana-k8s-monitoring-kube-state-metrics \ + grafana-k8s-monitoring-alloy-events +kubectl scale --replicas=0 --namespace metrics +``` + +:warning: metrics namespace only exists in the `us-central1-a` cluster. + +Wait until the command `kubectl get deployments --namespace grafana` shows +all deployments have been scaled down to zero. Then run: + +```bash +gcloud container clusters get-credentials llvm-premerge-cluster-us-west --location us-west1 +kubectl scale --replicas=0 --namespace grafana deployments \ + grafana-k8s-monitoring-opencost \ + grafana-k8s-monitoring-kube-state-metrics \ + grafana-k8s-monitoring-alloy-events + +gcloud container clusters get-credentials llvm-premerge-cluster-us-central --location us-central1-a +kubectl scale --replicas=1 --namespace grafana deployments \ + grafana-k8s-monitoring-opencost \ + grafana-k8s-monitoring-kube-state-metrics \ + grafana-k8s-monitoring-alloy-events +kubectl scale --replicas=1 --namespace metrics metrics +``` + +You can check the restarted service logs for errors. If the token is invalid +or the scope bad, you should see some `401` error codes. + +```bash +kubectl logs -n metrics deployment/metrics +kubectl logs -n metrics deployment/grafana-k8s-monitoring-opencost +``` + +At this stage, all long-lived services should be using the new tokens. +**DO NOT DELETE THE OLD TOKEN YET**. +The existing CI jobs can be quite long-lived. We need to wait for them to +finish. New CI jobs will pick up the new tokens. + +After 24 hours, log back in +`Administration > User and Access > Cloud Access policies` and expand the +token lists. +You should see the new tokens `Last used at` being about a dozen minutes at +most, while old tokens should remain unused for several hours. +If this is the case, congratulations, you've successfully rotated security +tokens! You can now safely delete the old unused tokens. From f503ff48eb2b3b68cedde51cea9adc4002df2e52 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 16:09:14 +0000 Subject: [PATCH 054/135] [CI] Wait for Pod to Schedule before Requesting Logs Otherwise we run into 500 errors if the pod takes too long to schedule. --- premerge/premerge_resources/main.tf | 4 ++-- .../builders/annotated/premerge/dispatch_job.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index eec4c8414..9ff654480 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -287,7 +287,7 @@ resource "kubernetes_role" "linux_buildbot_role" { rule { api_groups = [""] - resources = ["pods", "pods/log"] + resources = ["pods", "pods/log", "pods/status"] verbs = ["create", "delete", "get"] } @@ -338,7 +338,7 @@ resource "kubernetes_role" "windows_2022_buildbot_role" { rule { api_groups = [""] - resources = ["pods", "pods/log"] + resources = ["pods", "pods/log", "pods/status"] verbs = ["create", "delete", "get"] } diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 5b0473705..a94b7a93b 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -149,6 +149,13 @@ def read_logs(pod_name: str, namespace: str, v1_api) -> list[str]: return logs.split("\n")[:-1] +def get_pod_status(pod_name: str, namespace: str, v1_api) -> str: + """Gets the status of a pod.""" + return v1_api.read_namespaced_pod_status( + name=pod_name, namespace=namespace + ).status.phase + + def get_logs_to_print( logs: list[str], latest_time: datetime.datetime ) -> tuple[datetime.datetime, list[str]]: @@ -220,6 +227,11 @@ def main(commit_sha: str, platform: str): latest_time = datetime.datetime.min v1_api = kubernetes.client.CoreV1Api() print("@@@BUILD_STEP Build/Test@@@") + pod_status = "Pending" + while pod_status == "Pending": + print("Waiting for the pod to schedule onto a machine.") + time.sleep(SECONDS_QUERY_LOGS_EVERY) + pod_status = get_pod_status(pod_name, namespace, v1_api) while True: try: pod_finished, latest_time = print_logs( From 633706495bb1c8f15e21c0cbc1287a8e22124b83 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 20:40:38 +0000 Subject: [PATCH 055/135] [CI] Enable Premerge Windows Buildbots Now that the scripts have been verified as working on Windows, we can enable the worker deployments for Windows. --- premerge/premerge_resources/main.tf | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index 9ff654480..b6964f6cb 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -375,12 +375,11 @@ resource "kubernetes_role_binding" "windows_2022_buildbot_role_binding" { depends_on = [kubernetes_role.windows_2022_buildbot_role, kubernetes_service_account.windows_2022_buildbot_ksa] } -# TODO(boomanaiden154): Enable windows buildbots once Linux is stable. -#resource "kubernetes_manifest" "windows_buildbot_deployment" { -# manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password" })) -# -# depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_buildbot_password] -#} +resource "kubernetes_manifest" "windows_buildbot_deployment" { + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password" })) + + depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_2022_buildbot_password] +} resource "kubernetes_service_account" "linux_object_cache_ksa" { metadata { From 9f2f5c208b2973e95d0beb52ca4cd13bef93ca53 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 13:49:14 -0700 Subject: [PATCH 056/135] [CI] Double RAM in Service Pool (#546) The operational metrics CronJob was having trouble scheduling due to memory pressure. Change from highcpu instances to standard instances which increases the amount of RAM from 4GB to 16GB per instance. This costs slightly more, but the difference is negligible. If we end up being that concerned, we can use custom sized instances and move to only 8GB of RAM, but for now this should be fine. --- premerge/gke_cluster/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 5801d9000..8eecfb76e 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -39,7 +39,7 @@ resource "google_container_node_pool" "llvm_premerge_linux_service" { node_locations = var.service_node_pool_locations node_config { - machine_type = "e2-highcpu-4" + machine_type = "e2-standard-4" workload_metadata_config { mode = "GKE_METADATA" From 1ddfbcf05ba4867401e59ffbb35e6315c33b8756 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Mon, 11 Aug 2025 13:56:04 -0700 Subject: [PATCH 057/135] Make some edits to the libc windows build config (#543) Update the configs for Windows build. This adds LLVM_ENABLE_RUNTIMES, removes some unused configs, and changes the directory path from llvm-project/llvm to llvm-project/runtimes --- zorg/buildbot/builders/annotated/libc-windows.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/zorg/buildbot/builders/annotated/libc-windows.py b/zorg/buildbot/builders/annotated/libc-windows.py index 5d83348c5..7f7bf4aa5 100644 --- a/zorg/buildbot/builders/annotated/libc-windows.py +++ b/zorg/buildbot/builders/annotated/libc-windows.py @@ -39,17 +39,12 @@ def main(argv): if args.asan: cmake_args.append('-DLLVM_USE_SANITIZER=Address') - cmake_args.append('-DLLVM_ENABLE_PROJECTS=libc') - cmake_args.append('-DLLVM_TARGETS_TO_BUILD=X86') - cmake_args.append('-DLLVM_FORCE_BUILD_RUNTIME=libc') - cmake_args.append('-DLLVM_NATIVE_ARCH=x86_64') - cmake_args.append('-DLLVM_HOST_TRIPLE=x86_64-window-x86-gnu') - cmake_args.append('-DLLVM_LIBC_MPFR_INSTALL_PATH=C:/src/install') + cmake_args.append('-DLLVM_ENABLE_RUNTIMES=libc') - run_command(['cmake', os.path.join(source_dir, 'llvm')] + cmake_args) + run_command(['cmake', os.path.join(source_dir, 'runtimes')] + cmake_args) with step('build llvmlibc', halt_on_fail=True): - run_command(['ninja', 'llvmlibc']) + run_command(['ninja', 'libc']) with step('check-libc'): run_command(['ninja', 'check-libc']) From a83f4b3d2935658bc2ada26cfefa009d95dbfcd9 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 21:22:59 +0000 Subject: [PATCH 058/135] [CI] Make dispatch_job.py fail of build failed Otherwise the job always gets marked as succeeding on the buildbot side. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index a94b7a93b..0d3fa13cb 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -248,7 +248,14 @@ def main(commit_sha: str, platform: str): logging.error(f"Failed to get logs from the pod: {log_exception}") break time.sleep(SECONDS_QUERY_LOGS_EVERY) + pod_status = get_pod_status(pod_name, namespace, v1_api) v1_api.delete_namespaced_pod(pod_name, namespace) + if pod_status == "Succeeded": + print("Job Succeeded.") + sys.exit(0) + else: + print("Job Failed.") + sys.exit(1) if __name__ == "__main__": From a19ef3cfefe15e16c802615058bca666d8ca2d30 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 14:59:19 -0700 Subject: [PATCH 059/135] [CI] Bump libcxx next runner image to 77cb09 (#547) After https://github.com/llvm/llvm-project/pull/153035 we have a new runner image that should theoretically work. Set the next runner set to use it so we can start testing. --- premerge/premerge_resources/variables.tf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/premerge/premerge_resources/variables.tf b/premerge/premerge_resources/variables.tf index ad69e1e38..a6a3e12e6 100644 --- a/premerge/premerge_resources/variables.tf +++ b/premerge/premerge_resources/variables.tf @@ -69,10 +69,9 @@ variable "libcxx_release_runner_image" { default = "ghcr.io/llvm/libcxx-linux-builder:16f046281bf1a11d344eac1bc44d11f3e50e3b5d" } -# Same value as libcxx_runner_image at this time. variable "libcxx_next_runner_image" { type = string - default = "ghcr.io/llvm/libcxx-linux-builder:16f046281bf1a11d344eac1bc44d11f3e50e3b5d" + default = "ghcr.io/llvm/libcxx-linux-builder:77cb0980bcc2675b27d08141526939423fa0be76" } variable "linux_runners_namespace_name" { From bc34e5bfd8a1b7f6cd760398eddfe618a7971a7b Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 22:07:32 +0000 Subject: [PATCH 060/135] [CI] Wait for Pod to Complete Before Evaluating Status Otherwise the state might still be running when we grab the status which makes the script think the job failed. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 0d3fa13cb..5ff6a8e71 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -249,6 +249,10 @@ def main(commit_sha: str, platform: str): break time.sleep(SECONDS_QUERY_LOGS_EVERY) pod_status = get_pod_status(pod_name, namespace, v1_api) + while pod_status == "Running": + print("Waiting for pod to complete.") + time.sleep(SECONDS_QUERY_LOGS_EVERY) + pod_status = get_pod_status(pod_name, namespace, v1_api) v1_api.delete_namespaced_pod(pod_name, namespace) if pod_status == "Succeeded": print("Job Succeeded.") From 0d1f52efe8991bd7b9c776a97769ab1b1644f9ea Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 22:34:33 +0000 Subject: [PATCH 061/135] [CI] Customize Path for libcxx runner sets The Github runner path has changed to what we use for the Linux containers in the new runner images. For now, customize the command per runner set to work around this. Add a TODO to remove this once we have standardized on a single path. --- premerge/libcxx_runners_values.yaml | 2 +- premerge/premerge_resources/main.tf | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/premerge/libcxx_runners_values.yaml b/premerge/libcxx_runners_values.yaml index 83585826e..ab41ddaab 100644 --- a/premerge/libcxx_runners_values.yaml +++ b/premerge/libcxx_runners_values.yaml @@ -21,7 +21,7 @@ template: containers: - name: runner image: ${ runner_image } - command: ["/home/runner/run.sh"] + command: ["${ command }"] resources: # If we don't set the CPU request high-enough here, 2 runners might # be scheduled on the same pod, meaning 2 jobs, and they will starve diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index b6964f6cb..db48e2d34 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -225,6 +225,11 @@ resource "helm_release" "github_actions_runner_set_windows_2022" { ] } +# TODO(boomanaiden154): We have to customize the command for the libcxx runner +# containers because the file path has changed between the sets. Remove this +# workaround once all of the runner sets have the runner binary in the same +# path. + resource "helm_release" "github_actions_runner_set_libcxx" { name = "llvm-premerge-libcxx-runners" namespace = "llvm-premerge-libcxx-runners" @@ -233,7 +238,7 @@ resource "helm_release" "github_actions_runner_set_libcxx" { chart = "gha-runner-scale-set" values = [ - "${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_runner_image })}" + "${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_runner_image, command : "/home/runner/run.sh" })}" ] depends_on = [ @@ -251,7 +256,7 @@ resource "helm_release" "github_actions_runner_set_libcxx_release" { chart = "gha-runner-scale-set" values = [ - "${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_release_runner_image })}" + "${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_release_runner_image, command : "/home/runner/run.sh" })}" ] depends_on = [ @@ -269,7 +274,7 @@ resource "helm_release" "github_actions_runner_set_libcxx_next" { chart = "gha-runner-scale-set" values = [ - "${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_next_runner_image })}" + "${templatefile("libcxx_runners_values.yaml", { runner_group_name : var.runner_group_name, runner_image : var.libcxx_next_runner_image, command : "/home/gha/actions-runner/run.sh" })}" ] depends_on = [ From 0cadb92efe46483a85acd93a08275a5d5b0abdac Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 11 Aug 2025 15:37:25 -0700 Subject: [PATCH 062/135] [CI] Bump Object Age at Deletion to 7 Days (#548) Currently we delete cached build artifacts after one day. This patch bumps that to seven days. We are using very little storage currently (15-20GB) per bucket, so bumping this should have a negligible impact on cost. This also allows for builds that are not run as often (like those on the release branch) to have a warm cache when they are run again, assuming they are run at a reasonable cadence. --- premerge/gke_cluster/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 8eecfb76e..ff86e30ee 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -247,7 +247,7 @@ resource "google_storage_bucket" "object_cache_linux" { type = "Delete" } condition { - age = 1 + age = 7 } } } @@ -268,7 +268,7 @@ resource "google_storage_bucket" "object_cache_windows" { type = "Delete" } condition { - age = 1 + age = 7 } } } From 82405a5c30b4b6286c9446844c3aee3d808f7299 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 13 Aug 2025 12:57:42 -0700 Subject: [PATCH 063/135] [CI] Bump libc++ container version (#549) https://github.com/llvm/llvm-project/commit/4fd41c4afbc76ead0c46e80990f616d21dd983f6 updated the runner version in the container. Bump to that to take advantage of the runner vesion update. Bump both the next runner set and the existing runner set given we are currently running all jobs on the next runner set. This is safe to bump given this was a runner only version bump. --- premerge/premerge_resources/variables.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/premerge/premerge_resources/variables.tf b/premerge/premerge_resources/variables.tf index a6a3e12e6..56baf207f 100644 --- a/premerge/premerge_resources/variables.tf +++ b/premerge/premerge_resources/variables.tf @@ -61,7 +61,7 @@ variable "runner_group_name" { variable "libcxx_runner_image" { type = string - default = "ghcr.io/llvm/libcxx-linux-builder:16f046281bf1a11d344eac1bc44d11f3e50e3b5d" + default = "ghcr.io/llvm/libcxx-linux-builder:36d31b0c008b2716329b5c9990f583decf919819" } variable "libcxx_release_runner_image" { @@ -71,7 +71,7 @@ variable "libcxx_release_runner_image" { variable "libcxx_next_runner_image" { type = string - default = "ghcr.io/llvm/libcxx-linux-builder:77cb0980bcc2675b27d08141526939423fa0be76" + default = "ghcr.io/llvm/libcxx-linux-builder:36d31b0c008b2716329b5c9990f583decf919819" } variable "linux_runners_namespace_name" { From a0663d86d8f1c0af53da311d82d146047d9f29ee Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Thu, 14 Aug 2025 16:45:55 +0200 Subject: [PATCH 064/135] [HIP] Prefer ccache over incremental build (#550) It slipped my attention that this builder was (still?) doing incremental builds. This is highly not recommended by the community. This patch nukes an existing build dir completely to start from a clean state and rely on ccache to bring build-time improvements. --- zorg/buildbot/builders/annotated/hip-build.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/hip-build.sh b/zorg/buildbot/builders/annotated/hip-build.sh index e699fa18c..83b4b095e 100755 --- a/zorg/buildbot/builders/annotated/hip-build.sh +++ b/zorg/buildbot/builders/annotated/hip-build.sh @@ -71,6 +71,12 @@ git -C "${TESTSUITE_ROOT}" reset --hard origin/main # Start building LLVM, Clang, Lld, clang-tools-extra, compiler-rt build_llvm() { build_step "Configure LLVM Build" + +# Nuke the build dir to start from a cleaner state and rely on ccache for build time +if [ -d "${LLVM_BUILD_DIR}" ]; then + rm -rf "${LLVM_BUILD_DIR}" +fi + mkdir -p "${LLVM_BUILD_DIR}" cd "${LLVM_BUILD_DIR}" cmake -G Ninja \ @@ -136,4 +142,3 @@ update_test_suite build_test_suite exit 0 - From a1ee6637e5f693f13d550a47694b940b1a89c650 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Fri, 15 Aug 2025 21:44:46 +0100 Subject: [PATCH 065/135] [RISCV] Add rva23u64_zvl1024b to test matrix for the gauntlet buildbot And remove the EVL configuration now this is the default. --- .../builders/annotated/rise-riscv-gauntlet-build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh b/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh index 710a9d000..3f8b09f34 100755 --- a/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh +++ b/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh @@ -81,7 +81,7 @@ set +e # Skip a few tests that have excessive runtimes relative to the others. export LIT_FILTER_OUT='(SingleSource/Benchmarks/Polybench/linear-algebra/solvers/(ludcmp|lu)|MicroBenchmarks/LoopVectorization/LoopInterleavingBenchmarks)' -for CONF in rva20 rva22 rva23 rva23-evl rva23-mrvv-vec-bits; do +for CONF in rva20 rva22 rva23 rva23-zvl1024b rva23-mrvv-vec-bits; do RVA23_QEMU_CPU="rv64,zba=true,zbb=true,zbc=false,zbs=true,zfhmin=true,v=true,vext_spec=v1.0,zkt=true,zvfhmin=true,zvbb=true,zvkt=true,zihintntl=true,zicond=true,zimop=true,zcmop=true,zcb=true,zfa=true,zawrs=true,rvv_ta_all_1s=true,rvv_ma_all_1s=true,rvv_vl_half_avl=true" case "$CONF" in rva20) @@ -96,8 +96,8 @@ for CONF in rva20 rva22 rva23 rva23-evl rva23-mrvv-vec-bits; do CFLAGS="-march=rva23u64" QEMU_CPU=$RVA23_QEMU_CPU ;; - rva23-evl) - CFLAGS="-march=rva23u64 -mllvm -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue" + rva23-zvl1024b) + CFLAGS="-march=rva23u64_zvl1024b" QEMU_CPU=$RVA23_QEMU_CPU ;; rva23-mrvv-vec-bits) From 3d241398c7fbd179b2a828f50db23df0fdfa1576 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Fri, 15 Aug 2025 22:22:11 +0100 Subject: [PATCH 066/135] [RISCV] Add missing vlen=1024 to qemu invocation for zvl1024b riscv-gauntlet config --- zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh b/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh index 3f8b09f34..29103305a 100755 --- a/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh +++ b/zorg/buildbot/builders/annotated/rise-riscv-gauntlet-build.sh @@ -98,7 +98,7 @@ for CONF in rva20 rva22 rva23 rva23-zvl1024b rva23-mrvv-vec-bits; do ;; rva23-zvl1024b) CFLAGS="-march=rva23u64_zvl1024b" - QEMU_CPU=$RVA23_QEMU_CPU + QEMU_CPU="$RVA23_QEMU_CPU,vlen=1024" ;; rva23-mrvv-vec-bits) CFLAGS="-march=rva23u64 -mrvv-vector-bits=zvl" From e354528faa1ba4cf6b8684c6309704eaae7f8ed1 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 14:32:40 -0700 Subject: [PATCH 067/135] [CI] Terraform Plumbing for GCS Caching in Buildbot Add all of the service accounts/IAM bindings needed for GCS caching in the buildbot namespaces. Use separate service accounts in case we want to restrict the permissions between the PR checks and postcommit buildbots differently. Reviewers: cmtice, dschuff, gburgessiv, Keenuts Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/552 --- premerge/buildbot_deployment.yaml | 2 + premerge/gke_cluster/main.tf | 65 +++++++++++++++++++++++++++++ premerge/premerge_resources/main.tf | 22 +++++++++- 3 files changed, 87 insertions(+), 2 deletions(-) diff --git a/premerge/buildbot_deployment.yaml b/premerge/buildbot_deployment.yaml index ffb914c02..403f22e01 100644 --- a/premerge/buildbot_deployment.yaml +++ b/premerge/buildbot_deployment.yaml @@ -26,6 +26,8 @@ spec: secretKeyRef: name: ${ secret_name } key: password + - name: BUILDBOT_REGION + value: ${ buildbot_region } resources: requests: memory: "512Mi" diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index ff86e30ee..8a19d273e 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -283,6 +283,9 @@ resource "google_service_account" "object_cache_windows_gsa" { display_name = format("%s Windows Object Cache Service Account", var.region) } +# TODO(boomanaiden154): Restrict the permissions of the two IAM bindings +# below so that the PR runs can only read from the cache to help prevent +# cache poisoning. resource "google_storage_bucket_iam_binding" "linux_bucket_binding" { bucket = google_storage_bucket.object_cache_linux.name role = "roles/storage.objectUser" @@ -334,3 +337,65 @@ resource "google_service_account_iam_binding" "windows_bucket_gsa_workload_bindi google_service_account.object_cache_windows_gsa, ] } + +resource "google_service_account" "object_cache_linux_buildbot_gsa" { + account_id = format("%s-linux-buildbot", var.gcs_bucket_location) + display_name = format("%s Linux Object Cache Buildbot Service Account", var.region) +} + +resource "google_service_account" "object_cache_windows_buildbot_gsa" { + account_id = format("%s-windows-buildbot", var.gcs_bucket_location) + display_name = format("%s Windows Object Cache Buildbot Service Account", var.region) +} + +resource "google_storage_bucket_iam_binding" "linux_bucket_buildbot_binding" { + bucket = google_storage_bucket.object_cache_linux.name + role = "roles/storage.objectUser" + members = [ + format("serviceAccount:%s", google_service_account.object_cache_linux_buildbot_gsa.email), + ] + + depends_on = [ + google_storage_bucket.object_cache_linux, + google_service_account.object_cache_linux_buildbot_gsa, + ] +} + +resource "google_storage_bucket_iam_binding" "windows_bucket_buildbot_binding" { + bucket = google_storage_bucket.object_cache_windows.name + role = "roles/storage.objectUser" + members = [ + format("serviceAccount:%s", google_service_account.object_cache_windows_buildbot_gsa.email), + ] + + depends_on = [ + google_storage_bucket.object_cache_windows, + google_service_account.object_cache_windows_buildbot_gsa + ] +} + +resource "google_service_account_iam_binding" "linux_bucket_buildbot_gsa_workload_binding" { + service_account_id = google_service_account.object_cache_linux_buildbot_gsa.name + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${google_service_account.object_cache_linux_buildbot_gsa.project}.svc.id.goog[llvm-premerge-linux-buildbot/buildbot-gcs-ksa]", + ] + + depends_on = [ + google_service_account.object_cache_linux_buildbot_gsa, + ] +} + +resource "google_service_account_iam_binding" "windows_bucket_buildbot_gsa_workload_binding" { + service_account_id = google_service_account.object_cache_windows_buildbot_gsa.name + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${google_service_account.object_cache_windows_buildbot_gsa.project}.svc.id.goog[llvm-premerge-windows-2022-buildbot/buildbot-gcs-ksa]", + ] + + depends_on = [ + google_service_account.object_cache_windows_buildbot_gsa, + ] +} diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index db48e2d34..222f550d9 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -329,8 +329,17 @@ resource "kubernetes_role_binding" "linux_buildbot_role_binding" { depends_on = [kubernetes_role.linux_buildbot_role, kubernetes_service_account.linux_buildbot_ksa] } +resource "kubernetes_service_account" "linux_buildbot_gcs_ksa" { + metadata { + name = "buildbot-gcs-ksa" + namespace = "llvm-premerge-linux-buildbot" + } + + depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot] +} + resource "kubernetes_manifest" "linux_buildbot_deployment" { - manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password" })) + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password", buildbot_region : var.cluster_name })) depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot, kubernetes_secret.linux_buildbot_password] } @@ -380,8 +389,17 @@ resource "kubernetes_role_binding" "windows_2022_buildbot_role_binding" { depends_on = [kubernetes_role.windows_2022_buildbot_role, kubernetes_service_account.windows_2022_buildbot_ksa] } +resource "kubernetes_service_account" "windows_2022_buildbot_gcs_ksa" { + metadata { + name = "buildbot-gcs-ksa" + namespace = "llvm-premerge-windows-2022-buildbot" + } + + depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot] +} + resource "kubernetes_manifest" "windows_buildbot_deployment" { - manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password" })) + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password", buildbot_region : var.cluster_name })) depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_2022_buildbot_password] } From b01f6e0dec35cccb5de1d9a04c7bd17301a44805 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 14:44:28 -0700 Subject: [PATCH 068/135] [CI] Update dispatch_job.py to setup sccache Now that we have buckets available and plumbed through terraform, setup dispatch_job.py to actually setup sccache so the build scripts will use the remote cache. Reviewers: Keenuts, gburgessiv, dschuff, cmtice Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-zorg/pull/554 --- .../annotated/premerge/dispatch_job.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 5ff6a8e71..1e71ee2ba 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -30,6 +30,10 @@ "Linux": "ghcr.io/llvm/ci-ubuntu-24.04", "Windows": "ghcr.io/llvm/ci-windows-2022", } +PLATFORM_TO_GCS_BUCKET_SUFFIX = { + "Linux": "-object-cache-linux", + "Windows": "-object-cache-windows", +} LOG_SECONDS_TO_QUERY = 10 SECONDS_QUERY_LOGS_EVERY = 5 @@ -55,6 +59,7 @@ def start_build( "namespace": PLATFORM_TO_NAMESPACE[platform], }, "spec": { + "serviceAccountName": "buildbot-gcs-ksa", "tolerations": [ { "key": taint_key, @@ -84,7 +89,7 @@ def start_build( kubernetes.utils.create_from_dict(k8s_client, pod_definition) -def start_build_linux(commit_sha: str, k8s_client) -> str: +def start_build_linux(commit_sha: str, bucket_name: str, k8s_client) -> str: """Starts a pod to build/test on Linux at the specified SHA.""" pod_name = f"build-{commit_sha}" commands = [ @@ -94,6 +99,10 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: "export CC=clang", "export CXX=clang++", "export POSTCOMMIT_CI=1", + f"export SCCACHE_GCS_BUCKET={bucket_name}", + "export SCCACHE_GCS_RW_MODE=READ_WRITE", + "export SCCACHE_IDLE_TIMEOUT=0", + "sccache --start-server", './.ci/monolithic-linux.sh "polly" "check-polly" "" "" "" OFF', #'./.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' "echo BUILD FINISHED", @@ -104,7 +113,7 @@ def start_build_linux(commit_sha: str, k8s_client) -> str: return pod_name -def start_build_windows(commit_sha: str, k8s_client): +def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): """Starts a pod to build/test on Windows at the specified SHA.""" pod_name = f"build-{commit_sha}" bash_commands = [ @@ -112,6 +121,10 @@ def start_build_windows(commit_sha: str, k8s_client): "cd llvm-project", f"git checkout {commit_sha}", "export POSTCOMMIT_CI=1", + f"export SCCACHE_GCS_BUCKET={bucket_name}", + "export SCCACHE_GCS_RW_MODE=READ_WRITE", + "export SCCACHE_IDLE_TIMEOUT=0", + "sccache --start-server", ".ci/monolithic-windows.sh 'polly;mlir' 'check-polly check-mlir'", #'.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-cir check-clang-tools check-lld check-llvm check-mlir check-polly"', "echo BUILD FINISHED", @@ -217,10 +230,13 @@ def print_logs( def main(commit_sha: str, platform: str): kubernetes.config.load_incluster_config() k8s_client = kubernetes.client.ApiClient() + bucket_name = ( + os.environ["BUILDBOT_REGION"] + PLATFORM_TO_GCS_BUCKET_SUFFIX[platform] + ) if platform == "Linux": - pod_name = start_build_linux(commit_sha, k8s_client) + pod_name = start_build_linux(commit_sha, bucket_name, k8s_client) elif platform == "Windows": - pod_name = start_build_windows(commit_sha, k8s_client) + pod_name = start_build_windows(commit_sha, bucket_name, k8s_client) else: raise ValueError("Unrecognized platform.") namespace = PLATFORM_TO_NAMESPACE[platform] From b9349dce94b1804e9d7567def3253196972c5837 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 21:55:08 +0000 Subject: [PATCH 069/135] [CI] Echo commands and exit immediately if command fails --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 1e71ee2ba..170b0e67c 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -93,6 +93,7 @@ def start_build_linux(commit_sha: str, bucket_name: str, k8s_client) -> str: """Starts a pod to build/test on Linux at the specified SHA.""" pod_name = f"build-{commit_sha}" commands = [ + "set -ex", "git clone --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", f"git checkout {commit_sha}", @@ -117,6 +118,7 @@ def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): """Starts a pod to build/test on Windows at the specified SHA.""" pod_name = f"build-{commit_sha}" bash_commands = [ + "set -ex", "git clone --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", f"git checkout {commit_sha}", From 811ca19da40ba6a50d9c47a8ce8284072e5097a1 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 22:10:35 +0000 Subject: [PATCH 070/135] [CI] Exit if the pod failed If we do not hit "BUILD FINISHED" and the pod failed, we should also exit. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 170b0e67c..dd716434f 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -257,6 +257,9 @@ def main(commit_sha: str, platform: str): ) if pod_finished: break + pod_status = get_pod_status(pod_name, namespace, v1_api) + if pod_status == "Failed": + break except kubernetes.client.exceptions.ApiException as log_exception: if "ContainerCreating" in json.loads(log_exception.body)["message"]: logging.warning( From d867742f6d13d907c3de1c035028f0f26fb05bc9 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 22:24:10 +0000 Subject: [PATCH 071/135] Revert "[CI] Terraform Plumbing for GCS Caching in Buildbot" This reverts commit 80d58730dcbfca8bd35ec1af920d261d9b7f362e. --- premerge/buildbot_deployment.yaml | 2 - premerge/gke_cluster/main.tf | 65 ----------------------------- premerge/premerge_resources/main.tf | 22 +--------- 3 files changed, 2 insertions(+), 87 deletions(-) diff --git a/premerge/buildbot_deployment.yaml b/premerge/buildbot_deployment.yaml index 403f22e01..ffb914c02 100644 --- a/premerge/buildbot_deployment.yaml +++ b/premerge/buildbot_deployment.yaml @@ -26,8 +26,6 @@ spec: secretKeyRef: name: ${ secret_name } key: password - - name: BUILDBOT_REGION - value: ${ buildbot_region } resources: requests: memory: "512Mi" diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index 8a19d273e..ff86e30ee 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -283,9 +283,6 @@ resource "google_service_account" "object_cache_windows_gsa" { display_name = format("%s Windows Object Cache Service Account", var.region) } -# TODO(boomanaiden154): Restrict the permissions of the two IAM bindings -# below so that the PR runs can only read from the cache to help prevent -# cache poisoning. resource "google_storage_bucket_iam_binding" "linux_bucket_binding" { bucket = google_storage_bucket.object_cache_linux.name role = "roles/storage.objectUser" @@ -337,65 +334,3 @@ resource "google_service_account_iam_binding" "windows_bucket_gsa_workload_bindi google_service_account.object_cache_windows_gsa, ] } - -resource "google_service_account" "object_cache_linux_buildbot_gsa" { - account_id = format("%s-linux-buildbot", var.gcs_bucket_location) - display_name = format("%s Linux Object Cache Buildbot Service Account", var.region) -} - -resource "google_service_account" "object_cache_windows_buildbot_gsa" { - account_id = format("%s-windows-buildbot", var.gcs_bucket_location) - display_name = format("%s Windows Object Cache Buildbot Service Account", var.region) -} - -resource "google_storage_bucket_iam_binding" "linux_bucket_buildbot_binding" { - bucket = google_storage_bucket.object_cache_linux.name - role = "roles/storage.objectUser" - members = [ - format("serviceAccount:%s", google_service_account.object_cache_linux_buildbot_gsa.email), - ] - - depends_on = [ - google_storage_bucket.object_cache_linux, - google_service_account.object_cache_linux_buildbot_gsa, - ] -} - -resource "google_storage_bucket_iam_binding" "windows_bucket_buildbot_binding" { - bucket = google_storage_bucket.object_cache_windows.name - role = "roles/storage.objectUser" - members = [ - format("serviceAccount:%s", google_service_account.object_cache_windows_buildbot_gsa.email), - ] - - depends_on = [ - google_storage_bucket.object_cache_windows, - google_service_account.object_cache_windows_buildbot_gsa - ] -} - -resource "google_service_account_iam_binding" "linux_bucket_buildbot_gsa_workload_binding" { - service_account_id = google_service_account.object_cache_linux_buildbot_gsa.name - role = "roles/iam.workloadIdentityUser" - - members = [ - "serviceAccount:${google_service_account.object_cache_linux_buildbot_gsa.project}.svc.id.goog[llvm-premerge-linux-buildbot/buildbot-gcs-ksa]", - ] - - depends_on = [ - google_service_account.object_cache_linux_buildbot_gsa, - ] -} - -resource "google_service_account_iam_binding" "windows_bucket_buildbot_gsa_workload_binding" { - service_account_id = google_service_account.object_cache_windows_buildbot_gsa.name - role = "roles/iam.workloadIdentityUser" - - members = [ - "serviceAccount:${google_service_account.object_cache_windows_buildbot_gsa.project}.svc.id.goog[llvm-premerge-windows-2022-buildbot/buildbot-gcs-ksa]", - ] - - depends_on = [ - google_service_account.object_cache_windows_buildbot_gsa, - ] -} diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index 222f550d9..db48e2d34 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -329,17 +329,8 @@ resource "kubernetes_role_binding" "linux_buildbot_role_binding" { depends_on = [kubernetes_role.linux_buildbot_role, kubernetes_service_account.linux_buildbot_ksa] } -resource "kubernetes_service_account" "linux_buildbot_gcs_ksa" { - metadata { - name = "buildbot-gcs-ksa" - namespace = "llvm-premerge-linux-buildbot" - } - - depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot] -} - resource "kubernetes_manifest" "linux_buildbot_deployment" { - manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password", buildbot_region : var.cluster_name })) + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password" })) depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot, kubernetes_secret.linux_buildbot_password] } @@ -389,17 +380,8 @@ resource "kubernetes_role_binding" "windows_2022_buildbot_role_binding" { depends_on = [kubernetes_role.windows_2022_buildbot_role, kubernetes_service_account.windows_2022_buildbot_ksa] } -resource "kubernetes_service_account" "windows_2022_buildbot_gcs_ksa" { - metadata { - name = "buildbot-gcs-ksa" - namespace = "llvm-premerge-windows-2022-buildbot" - } - - depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot] -} - resource "kubernetes_manifest" "windows_buildbot_deployment" { - manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password", buildbot_region : var.cluster_name })) + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password" })) depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_2022_buildbot_password] } From 340f2a3087c2b7dcaff5d7593e180d4eb70357f4 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 22:39:16 +0000 Subject: [PATCH 072/135] Reapply "[CI] Terraform Plumbing for GCS Caching in Buildbot" This reverts commit fbee6cd496267a056afcf2c62361f6644d95348e. There were a couple minor issues preventing this from working: - duplicated GCS IAM service account bindings which overrode each other. - Missing GKE annotations on the service accounts in k8s --- premerge/buildbot_deployment.yaml | 2 + premerge/gke_cluster/main.tf | 38 ++++++++++++ premerge/gke_cluster/outputs.tf | 8 +++ premerge/main.tf | 76 +++++++++++++----------- premerge/premerge_resources/main.tf | 28 ++++++++- premerge/premerge_resources/variables.tf | 10 ++++ 6 files changed, 124 insertions(+), 38 deletions(-) diff --git a/premerge/buildbot_deployment.yaml b/premerge/buildbot_deployment.yaml index ffb914c02..403f22e01 100644 --- a/premerge/buildbot_deployment.yaml +++ b/premerge/buildbot_deployment.yaml @@ -26,6 +26,8 @@ spec: secretKeyRef: name: ${ secret_name } key: password + - name: BUILDBOT_REGION + value: ${ buildbot_region } resources: requests: memory: "512Mi" diff --git a/premerge/gke_cluster/main.tf b/premerge/gke_cluster/main.tf index ff86e30ee..cc01357ea 100644 --- a/premerge/gke_cluster/main.tf +++ b/premerge/gke_cluster/main.tf @@ -283,11 +283,22 @@ resource "google_service_account" "object_cache_windows_gsa" { display_name = format("%s Windows Object Cache Service Account", var.region) } +resource "google_service_account" "object_cache_linux_buildbot_gsa" { + account_id = format("%s-linux-buildbot", var.gcs_bucket_location) + display_name = format("%s Linux Object Cache Buildbot Service Account", var.region) +} + +resource "google_service_account" "object_cache_windows_buildbot_gsa" { + account_id = format("%s-windows-buildbot", var.gcs_bucket_location) + display_name = format("%s Windows Object Cache Buildbot Service Account", var.region) +} + resource "google_storage_bucket_iam_binding" "linux_bucket_binding" { bucket = google_storage_bucket.object_cache_linux.name role = "roles/storage.objectUser" members = [ format("serviceAccount:%s", google_service_account.object_cache_linux_gsa.email), + format("serviceAccount:%s", google_service_account.object_cache_linux_buildbot_gsa.email), ] depends_on = [ @@ -301,6 +312,7 @@ resource "google_storage_bucket_iam_binding" "windows_bucket_binding" { role = "roles/storage.objectUser" members = [ format("serviceAccount:%s", google_service_account.object_cache_windows_gsa.email), + format("serviceAccount:%s", google_service_account.object_cache_windows_buildbot_gsa.email), ] depends_on = [ @@ -334,3 +346,29 @@ resource "google_service_account_iam_binding" "windows_bucket_gsa_workload_bindi google_service_account.object_cache_windows_gsa, ] } + +resource "google_service_account_iam_binding" "linux_bucket_buildbot_gsa_workload_binding" { + service_account_id = google_service_account.object_cache_linux_buildbot_gsa.name + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${google_service_account.object_cache_linux_buildbot_gsa.project}.svc.id.goog[llvm-premerge-linux-buildbot/buildbot-gcs-ksa]", + ] + + depends_on = [ + google_service_account.object_cache_linux_buildbot_gsa, + ] +} + +resource "google_service_account_iam_binding" "windows_bucket_buildbot_gsa_workload_binding" { + service_account_id = google_service_account.object_cache_windows_buildbot_gsa.name + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${google_service_account.object_cache_windows_buildbot_gsa.project}.svc.id.goog[llvm-premerge-windows-2022-buildbot/buildbot-gcs-ksa]", + ] + + depends_on = [ + google_service_account.object_cache_windows_buildbot_gsa, + ] +} diff --git a/premerge/gke_cluster/outputs.tf b/premerge/gke_cluster/outputs.tf index 38b9c191d..76124e9a0 100644 --- a/premerge/gke_cluster/outputs.tf +++ b/premerge/gke_cluster/outputs.tf @@ -21,3 +21,11 @@ output "linux_object_cache_gcp_service_account_email" { output "windows_2022_object_cache_gcp_service_account_email" { value = google_service_account.object_cache_windows_gsa.email } + +output "linux_object_cache_buildbot_service_account_email" { + value = google_service_account.object_cache_linux_buildbot_gsa.email +} + +output "windows_2022_object_cache_buildbot_service_account_email" { + value = google_service_account.object_cache_windows_buildbot_gsa.email +} diff --git a/premerge/main.tf b/premerge/main.tf index cde4fa7bb..4c9ea48a7 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -157,24 +157,26 @@ provider "kubernetes" { } module "premerge_cluster_us_central_resources" { - source = "./premerge_resources" - github_app_id = data.google_secret_manager_secret_version.github_app_id.secret_data - github_app_installation_id = data.google_secret_manager_secret_version.github_app_installation_id.secret_data - github_app_private_key = data.google_secret_manager_secret_version.github_app_private_key.secret_data - cluster_name = "llvm-premerge-cluster-us-central" - grafana_token = data.google_secret_manager_secret_version.grafana_token.secret_data - runner_group_name = "llvm-premerge-cluster-us-central" - linux_runners_namespace_name = local.linux_runners_namespace_name - linux_runners_kubernetes_service_account_name = local.linux_runners_kubernetes_service_account_name - windows_2022_runners_namespace_name = local.windows_2022_runners_namespace_name - windows_2022_runners_kubernetes_service_account_name = local.windows_2022_runners_kubernetes_service_account_name - linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.linux_object_cache_gcp_service_account_email - windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.windows_2022_object_cache_gcp_service_account_email - github_arc_version = "0.12.1" - linux_buildbot_name = "premerge-us-central-linux" - linux_buildbot_password = data.google_secret_manager_secret_version.us_central_linux_buildbot_password.secret_data - windows_buildbot_name = "premerge-us-central-windows" - windows_buildbot_password = data.google_secret_manager_secret_version.us_central_windows_buildbot_password.secret_data + source = "./premerge_resources" + github_app_id = data.google_secret_manager_secret_version.github_app_id.secret_data + github_app_installation_id = data.google_secret_manager_secret_version.github_app_installation_id.secret_data + github_app_private_key = data.google_secret_manager_secret_version.github_app_private_key.secret_data + cluster_name = "llvm-premerge-cluster-us-central" + grafana_token = data.google_secret_manager_secret_version.grafana_token.secret_data + runner_group_name = "llvm-premerge-cluster-us-central" + linux_runners_namespace_name = local.linux_runners_namespace_name + linux_runners_kubernetes_service_account_name = local.linux_runners_kubernetes_service_account_name + windows_2022_runners_namespace_name = local.windows_2022_runners_namespace_name + windows_2022_runners_kubernetes_service_account_name = local.windows_2022_runners_kubernetes_service_account_name + linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.linux_object_cache_gcp_service_account_email + windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.windows_2022_object_cache_gcp_service_account_email + github_arc_version = "0.12.1" + linux_buildbot_name = "premerge-us-central-linux" + linux_buildbot_password = data.google_secret_manager_secret_version.us_central_linux_buildbot_password.secret_data + windows_buildbot_name = "premerge-us-central-windows" + windows_buildbot_password = data.google_secret_manager_secret_version.us_central_windows_buildbot_password.secret_data + linux_object_cache_buildbot_service_account_email = module.premerge_cluster_us_central.linux_object_cache_buildbot_service_account_email + windows_2022_object_cache_buildbot_service_account_email = module.premerge_cluster_us_central.windows_2022_object_cache_buildbot_service_account_email providers = { kubernetes = kubernetes.llvm-premerge-us-central helm = helm.llvm-premerge-us-central @@ -182,24 +184,26 @@ module "premerge_cluster_us_central_resources" { } module "premerge_cluster_us_west_resources" { - source = "./premerge_resources" - github_app_id = data.google_secret_manager_secret_version.github_app_id.secret_data - github_app_installation_id = data.google_secret_manager_secret_version.github_app_installation_id.secret_data - github_app_private_key = data.google_secret_manager_secret_version.github_app_private_key.secret_data - cluster_name = "llvm-premerge-cluster-us-west" - grafana_token = data.google_secret_manager_secret_version.grafana_token.secret_data - runner_group_name = "llvm-premerge-cluster-us-west" - linux_runners_namespace_name = local.linux_runners_namespace_name - linux_runners_kubernetes_service_account_name = local.linux_runners_kubernetes_service_account_name - windows_2022_runners_namespace_name = local.windows_2022_runners_namespace_name - windows_2022_runners_kubernetes_service_account_name = local.windows_2022_runners_kubernetes_service_account_name - linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.linux_object_cache_gcp_service_account_email - windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.windows_2022_object_cache_gcp_service_account_email - github_arc_version = "0.12.1" - linux_buildbot_name = "premerge-us-west-linux" - linux_buildbot_password = data.google_secret_manager_secret_version.us_west_linux_buildbot_password.secret_data - windows_buildbot_name = "premerge-us-west-windows" - windows_buildbot_password = data.google_secret_manager_secret_version.us_west_windows_buildbot_password.secret_data + source = "./premerge_resources" + github_app_id = data.google_secret_manager_secret_version.github_app_id.secret_data + github_app_installation_id = data.google_secret_manager_secret_version.github_app_installation_id.secret_data + github_app_private_key = data.google_secret_manager_secret_version.github_app_private_key.secret_data + cluster_name = "llvm-premerge-cluster-us-west" + grafana_token = data.google_secret_manager_secret_version.grafana_token.secret_data + runner_group_name = "llvm-premerge-cluster-us-west" + linux_runners_namespace_name = local.linux_runners_namespace_name + linux_runners_kubernetes_service_account_name = local.linux_runners_kubernetes_service_account_name + windows_2022_runners_namespace_name = local.windows_2022_runners_namespace_name + windows_2022_runners_kubernetes_service_account_name = local.windows_2022_runners_kubernetes_service_account_name + linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.linux_object_cache_gcp_service_account_email + windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.windows_2022_object_cache_gcp_service_account_email + github_arc_version = "0.12.1" + linux_buildbot_name = "premerge-us-west-linux" + linux_buildbot_password = data.google_secret_manager_secret_version.us_west_linux_buildbot_password.secret_data + windows_buildbot_name = "premerge-us-west-windows" + windows_buildbot_password = data.google_secret_manager_secret_version.us_west_windows_buildbot_password.secret_data + linux_object_cache_buildbot_service_account_email = module.premerge_cluster_us_west.linux_object_cache_buildbot_service_account_email + windows_2022_object_cache_buildbot_service_account_email = module.premerge_cluster_us_west.windows_2022_object_cache_buildbot_service_account_email providers = { kubernetes = kubernetes.llvm-premerge-us-west helm = helm.llvm-premerge-us-west diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index db48e2d34..6fd0629bf 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -329,8 +329,20 @@ resource "kubernetes_role_binding" "linux_buildbot_role_binding" { depends_on = [kubernetes_role.linux_buildbot_role, kubernetes_service_account.linux_buildbot_ksa] } +resource "kubernetes_service_account" "linux_buildbot_gcs_ksa" { + metadata { + name = "buildbot-gcs-ksa" + namespace = "llvm-premerge-linux-buildbot" + annotations = { + "iam.gke.io/gcp-service-account" = var.linux_object_cache_buildbot_service_account_email + } + } + + depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot] +} + resource "kubernetes_manifest" "linux_buildbot_deployment" { - manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password" })) + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password", buildbot_region : var.cluster_name })) depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot, kubernetes_secret.linux_buildbot_password] } @@ -380,8 +392,20 @@ resource "kubernetes_role_binding" "windows_2022_buildbot_role_binding" { depends_on = [kubernetes_role.windows_2022_buildbot_role, kubernetes_service_account.windows_2022_buildbot_ksa] } +resource "kubernetes_service_account" "windows_2022_buildbot_gcs_ksa" { + metadata { + name = "buildbot-gcs-ksa" + namespace = "llvm-premerge-windows-2022-buildbot" + annotations = { + "iam.gke.io/gcp-service-account" = var.windows_2022_object_cache_buildbot_service_account_email + } + } + + depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot] +} + resource "kubernetes_manifest" "windows_buildbot_deployment" { - manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password" })) + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password", buildbot_region : var.cluster_name })) depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_2022_buildbot_password] } diff --git a/premerge/premerge_resources/variables.tf b/premerge/premerge_resources/variables.tf index 56baf207f..0528b26ea 100644 --- a/premerge/premerge_resources/variables.tf +++ b/premerge/premerge_resources/variables.tf @@ -123,3 +123,13 @@ variable "windows_buildbot_password" { description = "The password for the windows buildbot that will run tests postcommit." type = string } + +variable "linux_object_cache_buildbot_service_account_email" { + description = "The email associated with the service account for the buildbot worker accessing the object cache on Linux." + type = string +} + +variable "windows_2022_object_cache_buildbot_service_account_email" { + description = "The email associated with the service account for the buildbot worker accessing the object cache on Windows." + type = string +} From 7c71eef911de43827177c1bee9ae1bd6dfcfbba6 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 18 Aug 2025 10:01:30 -0700 Subject: [PATCH 073/135] [PGO] Add Profcheck Buildbot (#551) This patch adds the infrastructure for a buildbot for the profcheck configuration (LLVM_ENABLE_PROFCHECK=ON). This patch does not yet add machinery for excluding currently failing tests/other tests that we want to ignore. Issue #147390 --- buildbot/osuosl/master/config/builders.py | 18 ++++++++++++++++++ buildbot/osuosl/master/config/workers.py | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 093bc95ac..2dca8777e 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3558,6 +3558,24 @@ checkout_llvm_sources=False, extra_args=["Windows"], depends_on_projects=["clang-tools-extra", "clang", "libclc", "lld", "llvm", "mlir", "polly"])}, + + # Builders for the profcheck configuration + # These workers run builds with LLVM_ENABLE_PROFCHECK=ON to ensure + # that profile information is propagated correctly. + {'name' : "profcheck", + 'workernames' : ["profcheck-b1", "profcheck-b2"], + 'builddir': "profcheck-build", + 'factory' : UnifiedTreeBuilder.getCmakeWithNinjaBuildFactory( + clean=True, + depends_on_projects=['llvm'], + extra_configure_args=[ + "-DCMAKE_BUILD_TYPE=Release", + "-DCMAKE_C_COMPILER_LAUNCHER=ccache", + "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", + "-DLLVM_ENABLE_ASSERTIONS=ON", + "-DLLVM_LIT_ARGS='--exclude-xfail'", + "-DLLVM_ENABLE_PROFCHECK=ON", + ])}, ] # LLDB remote-linux builder env variables. diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 945a07f8f..5a2dc3352 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -413,6 +413,12 @@ def get_all(): create_worker("premerge-us-west-linux", properties={'jobs': 64}, max_builds=3), create_worker("premerge-us-west-windows", properties={'jobs': 64}, max_builds=3), + # Workers for the profcheck configuration + # These workers run builds with LLVM_ENABLE_PROFCHECK=ON to ensure + # that profile information is propagated correctly. + create_worker("profcheck-b1", properties={'jobs': 64}, max_builds=1), + create_worker("profcheck-b2", properties={'jobs': 64}, max_builds=1), + # FIXME: A placeholder for annoying worker which nobody could stop. # adding it avoid logs spammed by failed authentication for that worker. create_worker("mlir-ubuntu-worker0"), From 28a1df03be5de983f1d8d59f83029a70749b6005 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Tue, 19 Aug 2025 13:58:16 +0100 Subject: [PATCH 074/135] Add DebugifyBuilder for debug location coverage testing (#493) This patch adds a new build factory for running tests using Debugify (see "How To Update Debug Info" in the LLVM docs[0]) for the purposes of detecting debug info errors as proposed on Discourse[1]. This builder is very similar to the `TestSuiteBuilder`, but it adds some required CMake flags to the LLVM and Test Suite builds, and adds an extra step where we use a script within `llvm/utils` to evaluate the output of Debugify. As part of implementing this, I had to make some small changes to the `TestSuiteBuilder` to allow CMake flags to be passed for the Test Suite build, as currently it only accepts flags for the LLVM build. These changes should be a no-op for all existing builds; only by passing the new `extra_test_suite_configure_args` parameter should this have any effect. [0] https://llvm.org/docs/HowToUpdateDebugInfo.html#test-original-debug-info-preservation-in-optimizations [1] https://discourse.llvm.org/t/rfc-require-real-or-annotated-source-locations-on-all-instructions/86816 --- zorg/buildbot/builders/DebugifyBuilder.py | 96 ++++++++++++++++++++++ zorg/buildbot/builders/TestSuiteBuilder.py | 17 ++-- 2 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 zorg/buildbot/builders/DebugifyBuilder.py diff --git a/zorg/buildbot/builders/DebugifyBuilder.py b/zorg/buildbot/builders/DebugifyBuilder.py new file mode 100644 index 000000000..439752256 --- /dev/null +++ b/zorg/buildbot/builders/DebugifyBuilder.py @@ -0,0 +1,96 @@ +from buildbot.plugins import util +from buildbot.steps.shell import ShellCommand +from zorg.buildbot.builders import TestSuiteBuilder +from zorg.buildbot.commands.CmakeCommand import CmakeCommand + + +def addCheckDebugifyStep(f, debugify_output_path, compiler_dir=".", env={}): + script = util.Interpolate( + f"%(prop:builddir)s/{compiler_dir}/llvm/utils/llvm-original-di-preservation.py" + ) + f.addStep( + ShellCommand( + name="check debugify output", + command=[ + "python3", + script, + util.Interpolate(debugify_output_path), + "--acceptance-test", + "--reduce", + ], + description="check debugify output", + env=env, + ) + ) + + +def getDebugifyBuildFactory( + depends_on_projects=None, + enable_runtimes="auto", + targets=None, + llvm_srcdir=None, + obj_dir=None, + checks=None, + install_dir=None, + clean=False, + test_suite_build_flags="-O2 -g -DNDEBUG", + extra_configure_args=None, + enable_origin_tracking=True, + extra_test_suite_configure_args=None, + env={}, + **kwargs, +): + + # Make a local copy of the LLVM configure args, as we are going to modify that. + if extra_configure_args is not None: + llvm_cmake_args = extra_configure_args[:] + else: + llvm_cmake_args = list() + + tracking_mode = "COVERAGE_AND_ORIGIN" if enable_origin_tracking else "COVERAGE" + CmakeCommand.applyRequiredOptions(llvm_cmake_args, [ + ('-DLLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING=', tracking_mode) + ]) + + # This path will be passed through to util.Interpolate, so we leave it in this format. + debugify_output_path = f"%(prop:builddir)s/debugify-report.json" + + # Make a local copy of the test suite configure args, as we are going to modify that. + if extra_test_suite_configure_args is not None: + test_suite_cmake_args = extra_test_suite_configure_args[:] + else: + test_suite_cmake_args = list() + + CmakeCommand.applyDefaultOptions(test_suite_cmake_args, [ + ('-DTEST_SUITE_SUBDIRS=', 'CTMark'), + ('-DTEST_SUITE_RUN_BENCHMARKS=', 'false'), + ('-DTEST_SUITE_COLLECT_CODE_SIZE=', 'false'), + ]) + # The only configuration that currently makes sense for Debugify builds is optimized debug info builds; any build + # configuration adjustments can be made through the test_suite_build_flags arg. + build_flags = f'{test_suite_build_flags} -Xclang -fverify-debuginfo-preserve -Xclang -fverify-debuginfo-preserve-export={debugify_output_path} -mllvm --debugify-quiet -mllvm -debugify-level=locations' + CmakeCommand.applyRequiredOptions(test_suite_cmake_args, [ + ('-DCMAKE_BUILD_TYPE=', 'RelWithDebInfo'), + ]) + test_suite_cmake_args += [ + util.Interpolate(f"-DCMAKE_C_FLAGS_RELWITHDEBINFO={build_flags}"), + util.Interpolate(f"-DCMAKE_CXX_FLAGS_RELWITHDEBINFO={build_flags}"), + ] + + f = TestSuiteBuilder.getTestSuiteBuildFactory( + depends_on_projects=depends_on_projects, + enable_runtimes=enable_runtimes, + targets=targets, + llvm_srcdir=llvm_srcdir, + obj_dir=obj_dir, + checks=checks, + install_dir=install_dir, + clean=clean, + extra_configure_args=llvm_cmake_args, + extra_test_suite_configure_args=test_suite_cmake_args, + **kwargs + ) + + addCheckDebugifyStep(f, debugify_output_path, compiler_dir=f.monorepo_dir, env=env) + + return f diff --git a/zorg/buildbot/builders/TestSuiteBuilder.py b/zorg/buildbot/builders/TestSuiteBuilder.py index fb9c3bb49..719315b48 100644 --- a/zorg/buildbot/builders/TestSuiteBuilder.py +++ b/zorg/buildbot/builders/TestSuiteBuilder.py @@ -16,6 +16,7 @@ def addTestSuiteStep( compiler_dir = '.', env = None, lit_args = None, + extra_configure_args = None, **kwargs): # Set defaults @@ -24,15 +25,19 @@ def addTestSuiteStep( if lit_args is None: lit_args = [] - cc = util.Interpolate('-DCMAKE_C_COMPILER=' + '%(prop:builddir)s/'+compiler_dir+'/bin/clang') - cxx = util.Interpolate('-DCMAKE_CXX_COMPILER=' + '%(prop:builddir)s/'+compiler_dir+'/bin/clang++') + cc = util.Interpolate('-DCMAKE_C_COMPILER=%(prop:builddir)s/'+compiler_dir+'/bin/clang') + cxx = util.Interpolate('-DCMAKE_CXX_COMPILER=%(prop:builddir)s/'+compiler_dir+'/bin/clang++') lit = util.Interpolate('%(prop:builddir)s/' + compiler_dir + '/bin/llvm-lit') test_suite_base_dir = util.Interpolate('%(prop:builddir)s/' + 'test') test_suite_src_dir = util.Interpolate('%(prop:builddir)s/' + 'test/test-suite') test_suite_workdir = util.Interpolate('%(prop:builddir)s/' + 'test/build-test-suite') - cmake_lit_arg = util.Interpolate('-DTEST_SUITE_LIT:FILEPATH=' + '%(prop:builddir)s/' + compiler_dir + '/bin/llvm-lit') + cmake_lit_arg = util.Interpolate('-DTEST_SUITE_LIT:FILEPATH=%(prop:builddir)s/' + compiler_dir + '/bin/llvm-lit') # used for cmake building test-suite step - options = [cc, cxx, cmake_lit_arg] + if extra_configure_args is not None: + cmake_args = extra_configure_args[:] + else: + cmake_args = list() + cmake_args.extend([cc, cxx, cmake_lit_arg]) # always clobber the build directory to test each new compiler f.addStep(ShellCommand(name='Clean Test Suite Build dir', @@ -51,7 +56,7 @@ def addTestSuiteStep( haltOnFailure=True, description='Running cmake on Test Suite dir', workdir=test_suite_workdir, - options=options, + options=cmake_args, path=test_suite_src_dir, generator='Ninja')) @@ -80,6 +85,7 @@ def getTestSuiteBuildFactory( install_dir = None, clean = False, extra_configure_args = None, + extra_test_suite_configure_args = None, env = None, **kwargs): @@ -109,6 +115,7 @@ def getTestSuiteBuildFactory( compiler_dir=f.obj_dir, env=env, lit_args=lit_args, + extra_configure_args=extra_test_suite_configure_args, **kwargs) return f From 1d8e3061d5c7264ce0bfac1585c271b289672321 Mon Sep 17 00:00:00 2001 From: dyung Date: Tue, 19 Aug 2025 12:21:13 -0400 Subject: [PATCH 075/135] Remove Key Instruction builder and add a new Debugify builder (#555) --- buildbot/osuosl/master/config/builders.py | 31 +++++++++++++---------- buildbot/osuosl/master/config/status.py | 15 ++++++++--- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 2dca8777e..974a4dce4 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -1039,24 +1039,27 @@ "-DLLVM_USE_LINKER=gold", "-DLLVM_ENABLE_WERROR=OFF"])}, - {'name': "llvm-clang-key-instructions", - 'tags' : ["llvm", "clang", "compiler-rt", "lld", "cross-project-tests"], - 'workernames': ["sie-linux-worker5"], - 'builddir': "llvm-ki", - 'factory': UnifiedTreeBuilder.getCmakeWithNinjaBuildFactory( - depends_on_projects=['llvm','clang','compiler-rt','lld','cross-project-tests'], + {'name': "llvm-x86_64-debugify-coverage", + 'tags': ["llvm", "clang", "lld"], + 'workernames': ["sie-linux-worker5"], + 'builddir': "llvm-dbg", + 'factory': DebugifyBuilder.getDebugifyBuildFactory( + clean=True, + depends_on_projects=['llvm','clang','lld'], extra_configure_args=[ - "-DCMAKE_C_COMPILER=gcc", - "-DCMAKE_CXX_COMPILER=g++", - "-DCMAKE_BUILD_TYPE=Release", - "-DCLANG_ENABLE_CLANGD=OFF", - "-DLLVM_BUILD_RUNTIME=ON", + "-DCMAKE_C_COMPILER=clang", + "-DCMAKE_CXX_COMPILER=clang++", + "-DCMAKE_BUILD_TYPE=RelWithDebInfo", + "-DCMAKE_C_FLAGS_RELWITHDEBINFO=-O2 -gmlt -DNDEBUG", + "-DCMAKE_CXX_FLAGS_RELWITHDEBINFO=-O2 -gmlt -DNDEBUG", + "-DLLVM_CCACHE_BUILD=ON", "-DLLVM_BUILD_TESTS=ON", "-DLLVM_ENABLE_ASSERTIONS=ON", - "-DLLVM_EXPERIMENTAL_KEY_INSTRUCTIONS=ON", "-DLLVM_INCLUDE_EXAMPLES=OFF", - "-DLLVM_LIT_ARGS=--verbose --timeout=900", - "-DLLVM_USE_LINKER=gold"])}, + "-DLLVM_TARGETS_TO_BUILD=X86", + "-DLLVM_LIT_ARGS=-v", + "-DLLVM_USE_LINKER=lld", + "-DLLVM_ENABLE_WERROR=OFF"])}, {'name': "llvm-clang-x86_64-darwin", 'tags' : ["llvm", "clang", "clang-tools-extra", "lld", "cross-project-tests"], diff --git a/buildbot/osuosl/master/config/status.py b/buildbot/osuosl/master/config/status.py index 30a46a037..3d3e64b35 100644 --- a/buildbot/osuosl/master/config/status.py +++ b/buildbot/osuosl/master/config/status.py @@ -360,7 +360,7 @@ def getReporters(): "llvm-clang-x86_64-darwin", "llvm-clang-aarch64-darwin", "llvm-clang-aarch64-darwin-release", - "llvm-clang-key-instructions"]) + "llvm-x86_64-debugify-coverage"]) ]), reporters.MailNotifier( fromaddr = status_email_fromaddr, @@ -485,8 +485,7 @@ def getReporters(): utils.LLVMDefaultBuildStatusGenerator( builders = [ "cross-project-tests-sie-ubuntu", - "llvm-clang-x86_64-sie-win", - "llvm-clang-key-instructions"]) + "llvm-clang-x86_64-sie-win"]) ]), reporters.MailNotifier( fromaddr = status_email_fromaddr, @@ -560,6 +559,16 @@ def getReporters(): "flang-runtime-cuda-gcc", "flang-runtime-cuda-clang"]) ]), + reporters.MailNotifier( + fromaddr = status_email_fromaddr, + sendToInterestedUsers = False, + extraRecipients = ["stephen.tozer@sony.com"], + generators = [ + utils.LLVMDefaultBuildStatusGenerator( + builders = [ + "llvm-x86_64-debugify-coverage"]) + ]), + ]) return r From 01cd8633c32776dd3859d0aeb014b285be980374 Mon Sep 17 00:00:00 2001 From: dyung Date: Tue, 19 Aug 2025 15:49:33 -0400 Subject: [PATCH 076/135] Add import of the DebugifyBuilder that I forgot in #555. (#558) --- buildbot/osuosl/master/config/builders.py | 1 + 1 file changed, 1 insertion(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 974a4dce4..559ff864c 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -18,6 +18,7 @@ from zorg.buildbot.builders import XToolchainBuilder from zorg.buildbot.builders import TestSuiteBuilder from zorg.buildbot.builders import BOLTBuilder +from zorg.buildbot.builders import DebugifyBuilder from zorg.buildbot.builders import HtmlDocsBuilder from zorg.buildbot.builders import DoxygenDocsBuilder From db9bc5d52dfcf3f0e3ab5b57df74378334ddac70 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 19 Aug 2025 20:06:47 +0000 Subject: [PATCH 077/135] [CI] Run full pipeline on premerge builders This patch runs all the checks on the premerge builders rather than a small subset intended for testing purposes. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index dd716434f..2e3dc2dbc 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -104,8 +104,7 @@ def start_build_linux(commit_sha: str, bucket_name: str, k8s_client) -> str: "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", - './.ci/monolithic-linux.sh "polly" "check-polly" "" "" "" OFF', - #'./.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' + './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' "echo BUILD FINISHED", ] start_build( @@ -127,8 +126,7 @@ def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", - ".ci/monolithic-windows.sh 'polly;mlir' 'check-polly check-mlir'", - #'.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-cir check-clang-tools check-lld check-llvm check-mlir check-polly"', + '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-tools check-lld check-llvm check-mlir check-polly"', "echo BUILD FINISHED", ] bash_command = f"bash -c \"{';'.join(bash_commands)}\"\"" From a1805019b96cc67966979a2a88bca70e9e4a99a1 Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Tue, 19 Aug 2025 16:23:31 -0400 Subject: [PATCH 078/135] [CI] Stop uploading metrics via Grafana API endpoint (#557) This changes makes it so we stop uploading commit data to our Grafana Prometheus data source. Using the BigQuery plugin for Grafana instead allows us to query our BigQuery dataset directly and visualize all of our historical data in our dashboard. This means that BigQuery is now our primary data source and we are no longer using our Prometheus data source for the operational maturity dashboard. Since we're not using prometheus as a data source anymore, we can stop uploading data to it. --- .../ops-container/process_llvm_commits.py | 98 +------------------ premerge/main.tf | 4 +- premerge/operational_metrics_cronjob.yaml | 10 -- 3 files changed, 2 insertions(+), 110 deletions(-) diff --git a/llvm-ops-metrics/ops-container/process_llvm_commits.py b/llvm-ops-metrics/ops-container/process_llvm_commits.py index 6cc3c39d4..29bc1067e 100644 --- a/llvm-ops-metrics/ops-container/process_llvm_commits.py +++ b/llvm-ops-metrics/ops-container/process_llvm_commits.py @@ -6,9 +6,6 @@ from google.cloud import bigquery import requests -GRAFANA_URL = ( - "https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write" -) GITHUB_GRAPHQL_API_URL = "https://api.github.com/graphql" REPOSITORY_URL = "https://github.com/llvm/llvm-project.git" @@ -199,84 +196,6 @@ def query_for_reviews( return list(new_commits.values()) -def get_past_contributors(bq_client: bigquery.Client) -> set[str]: - """Get past contributors to LLVM from BigQuery dataset. - - Args: - bq_client: The BigQuery client to use. - - Returns: - Set of unique past contributors to LLVM. - """ - results = bq_client.query(""" - SELECT - DISTINCT commit_author - FROM %s.%s - WHERE commit_author IS NOT NULL - """ % (OPERATIONAL_METRICS_DATASET, LLVM_COMMITS_TABLE)).result() - return set(row.commit_author for row in results) - - -def upload_daily_metrics_to_grafana( - grafana_api_key: str, - grafana_metrics_userid: str, - new_commits: list[LLVMCommitInfo], - past_contributors: set[str], -) -> None: - """Upload daily commit metrics to Grafana. - - Args: - grafana_api_key: The key to make API requests with. - grafana_metrics_userid: The user to make API requests with. - new_commits: List of commits to process & upload to Grafana. - past_contributors: Set of unique past contributors to LLVM. - """ - - def post_data(data: str) -> None: - """Helper function to post data to Grafana.""" - response = requests.post( - GRAFANA_URL, - headers={"Content-Type": "text/plain"}, - data=data, - auth=(grafana_metrics_userid, grafana_api_key), - ) - if response.status_code < 200 or response.status_code >= 300: - logging.error("Failed to submit data to Grafana: %s", response.text) - - # Count each type of commit made - approval_count = 0 - review_count = 0 - pull_request_count = 0 - push_count = 0 - contributors = set() - for commit in new_commits: - if commit.is_approved: - approval_count += 1 - elif commit.is_reviewed: - review_count += 1 - elif commit.has_pull_request: - pull_request_count += 1 - else: - push_count += 1 - contributors.add(commit.commit_author) - - # Post data via InfluxDB API call - # Commit data - request_data = ( - "llvm_project_main_daily_commits" - " approval_count={},review_count={},pull_request_count={},push_count={}" - ).format(approval_count, review_count, pull_request_count, push_count) - post_data(request_data) - - # Contributor data - request_data = ( - "llvm_project_main" - " daily_unique_contributor_count={},all_time_unique_contributor_count={}" - .format(len(contributors), len(contributors | past_contributors)) - ) - post_data(request_data) - - def upload_daily_metrics_to_bigquery( bq_client: bigquery.Client, new_commits: list[LLVMCommitInfo] ) -> None: @@ -296,10 +215,6 @@ def upload_daily_metrics_to_bigquery( def main() -> None: github_token = os.environ["GITHUB_TOKEN"] - grafana_api_key = os.environ["GRAFANA_API_KEY"] - grafana_metrics_userid = os.environ["GRAFANA_METRICS_USERID"] - - bq_client = bigquery.Client() # Scrape new commits date_to_scrape = datetime.datetime.now( @@ -317,20 +232,9 @@ def main() -> None: logging.info("Querying for reviews of new commits.") new_commit_info = query_for_reviews(new_commits, github_token) - logging.info("Getting set of past LLVM contributors.") - past_contributors = get_past_contributors(bq_client) - - logging.info("Uploading metrics to Grafana.") - upload_daily_metrics_to_grafana( - grafana_api_key, - grafana_metrics_userid, - new_commit_info, - past_contributors, - ) - logging.info("Uploading metrics to BigQuery.") + bq_client = bigquery.Client() upload_daily_metrics_to_bigquery(bq_client, new_commit_info) - bq_client.close() diff --git a/premerge/main.tf b/premerge/main.tf index 4c9ea48a7..2371ebfb6 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -315,9 +315,7 @@ resource "kubernetes_secret" "operational_metrics_secrets" { } data = { - "github-token" = data.google_secret_manager_secret_version.metrics_github_pat.secret_data - "grafana-api-key" = data.google_secret_manager_secret_version.metrics_grafana_api_key.secret_data - "grafana-metrics-userid" = data.google_secret_manager_secret_version.metrics_grafana_metrics_userid.secret_data + "github-token" = data.google_secret_manager_secret_version.metrics_github_pat.secret_data } type = "Opaque" diff --git a/premerge/operational_metrics_cronjob.yaml b/premerge/operational_metrics_cronjob.yaml index e2a0b965c..8058367cd 100644 --- a/premerge/operational_metrics_cronjob.yaml +++ b/premerge/operational_metrics_cronjob.yaml @@ -25,16 +25,6 @@ spec: secretKeyRef: name: operational-metrics-secrets key: github-token - - name: GRAFANA_API_KEY - valueFrom: - secretKeyRef: - name: operational-metrics-secrets - key: grafana-api-key - - name: GRAFANA_METRICS_USERID - valueFrom: - secretKeyRef: - name: operational-metrics-secrets - key: grafana-metrics-userid resources: requests: cpu: "250m" From 4f41d968ab64b2e61909d7f205469a68773e50b7 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 19 Aug 2025 20:36:31 +0000 Subject: [PATCH 079/135] [CI] Add missing comma Otherwise python tries string concatenation which completely screws everything up. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 2e3dc2dbc..8f7fc43b9 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -104,7 +104,7 @@ def start_build_linux(commit_sha: str, bucket_name: str, k8s_client) -> str: "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", - './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"' + './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"', "echo BUILD FINISHED", ] start_build( From 29ed31051fc10b817749f05055f1dc15c996c44f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 19 Aug 2025 21:51:15 +0000 Subject: [PATCH 080/135] [CI] Use emptyDir mount for windows buildbot pods Otherwise the pods run out of space and we get a ton of test failures. This is the same configuration used in the Github orchestarted premerge checks. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 8f7fc43b9..77cc5df60 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -86,6 +86,11 @@ def start_build( "restartPolicy": "Never", }, } + if platform == "Windows": + pod_definition["spec"]["volumes"] = [{"name": "builddir", "emptyDir": {}}] + pod_definition["spec"]["containers"][0]["volumeMounts"] = [ + {"name": "builddir", "mountPath": "C:/_work"} + ] kubernetes.utils.create_from_dict(k8s_client, pod_definition) @@ -118,6 +123,7 @@ def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): pod_name = f"build-{commit_sha}" bash_commands = [ "set -ex", + "cd C:/_work", "git clone --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", f"git checkout {commit_sha}", From b6ad08dd240eb3fb36ad088c6c5450a818af7f92 Mon Sep 17 00:00:00 2001 From: azharudd Date: Tue, 19 Aug 2025 15:25:09 -0700 Subject: [PATCH 081/135] [jenkins] build.py: Print machine info (#560) --- zorg/jenkins/build.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/zorg/jenkins/build.py b/zorg/jenkins/build.py index 9683470d1..c06500649 100644 --- a/zorg/jenkins/build.py +++ b/zorg/jenkins/build.py @@ -1019,6 +1019,14 @@ def run_ws(cmd, env=None, sudo=False, err_okay=False): return run_cmd(conf.workspace, cmd, env, sudo=sudo, err_okay=err_okay) +def print_machine_info(): + header("Machine Info") + run_ws(["sw_vers"]) + run_ws(["xcodebuild", "-version"]) + run_ws(["cmake", "--version"]) + footer() + + def parse_args(): """Get the command line arguments, and make sure they are correct.""" @@ -1088,6 +1096,7 @@ def main(): args = parse_args() conf = Configuration(args) + print_machine_info() create_builddirs() try: if args.build_type == 'clang': From 6188ab56b81a7890f386932821824c69cf2ac317 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 20 Aug 2025 01:14:58 +0000 Subject: [PATCH 082/135] [CI] DO not allow git to change line endings on Windows This patch sets core.autocrlf to false on Windows for the premerge buildbots. Otherwise we get a bunch of test failures due to line ending differences. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 77cc5df60..2310a5058 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -124,7 +124,7 @@ def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): bash_commands = [ "set -ex", "cd C:/_work", - "git clone --depth 100 https://github.com/llvm/llvm-project", + "git clone --config core.autocrlf=false --depth 100 https://github.com/llvm/llvm-project", "cd llvm-project", f"git checkout {commit_sha}", "export POSTCOMMIT_CI=1", From f03cc99e5814796ceda58479bd2dc6292dbf1cd0 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 20 Aug 2025 07:39:30 -0700 Subject: [PATCH 083/135] [CI] Add more premerge buildbot workers (#559) The original intention for the premerge workers was that they could handle multiple builds at the same time. However, buildbot does not support a worker running more than one job at a time for a single builder, which is how we have things setup. Switch to having three builders per platform per cluster rather than one builder with max_builders set to 3. --- buildbot/osuosl/master/config/builders.py | 76 +++++++++++++++++------ buildbot/osuosl/master/config/workers.py | 16 +++-- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 559ff864c..f5d16ef66 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3545,24 +3545,64 @@ # postcommit (after changes have landed in main). The configuration for # running these checks premerge exists in the monorepo inside the # .github/workflows/premerge.yaml file. - {'name': "premerge-monolithic-linux", - 'workernames': ["premerge-us-central-linux", "premerge-us-west-linux"], - 'builddir': "premerge-monolithic-linux", - 'factory': AnnotatedBuilder.getAnnotatedBuildFactory( - script="premerge/dispatch_job.py", - checkout_llvm_sources=False, - extra_args=["Linux"], - depends_on_projects=["bolt", "clang", "clang-tools-extra", "compiler-rt", "flang", "flang-rt", "libc", "libclc", "lld", "lldb", "llvm", "mlir", "polly"])}, - - {'name': "premerge-monolithic-windows", - 'workernames': ["premerge-us-central-windows", "premerge-us-west-windows"], - 'builddir': "premerge-monolithic-windows", - 'factory': AnnotatedBuilder.getAnnotatedBuildFactory( - script="premerge/dispatch_job.py", - checkout_llvm_sources=False, - extra_args=["Windows"], - depends_on_projects=["clang-tools-extra", "clang", "libclc", "lld", "llvm", "mlir", "polly"])}, - + { + "name": "premerge-monolithic-linux", + "workernames": [ + "premerge-us-central-linux-b1", + "premerge-us-central-linux-b2", + "premerge-us-central-linux-b3", + "premerge-us-west-linux-b1", + "premerge-us-west-linux-b2", + "premerge-us-west-linux-b3", + ], + "builddir": "premerge-monolithic-linux", + "factory": AnnotatedBuilder.getAnnotatedBuildFactory( + script="premerge/dispatch_job.py", + checkout_llvm_sources=False, + extra_args=["Linux"], + depends_on_projects=[ + "bolt", + "clang", + "clang-tools-extra", + "compiler-rt", + "flang", + "flang-rt", + "libc", + "libclc", + "lld", + "lldb", + "llvm", + "mlir", + "polly", + ], + ), + }, + { + "name": "premerge-monolithic-windows", + "workernames": [ + "premerge-us-central-windows-b1", + "premerge-us-central-windows-b2", + "premerge-us-central-windows-b3", + "premerge-us-west-windows-b1", + "premerge-us-west-windows-b2", + "premerge-us-west-windows-b3", + ], + "builddir": "premerge-monolithic-windows", + "factory": AnnotatedBuilder.getAnnotatedBuildFactory( + script="premerge/dispatch_job.py", + checkout_llvm_sources=False, + extra_args=["Windows"], + depends_on_projects=[ + "clang-tools-extra", + "clang", + "libclc", + "lld", + "llvm", + "mlir", + "polly", + ], + ), + }, # Builders for the profcheck configuration # These workers run builds with LLVM_ENABLE_PROFCHECK=ON to ensure # that profile information is propagated correctly. diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 5a2dc3352..e903088ce 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -408,10 +408,18 @@ def get_all(): # postcommit (after changes have landed in main). The workers for the # infrastructure that runs the checks premerge are setup through Github # Actions under the premerge/ folder in llvm-zorg. - create_worker("premerge-us-central-linux", properties={'jobs': 64}, max_builds=3), - create_worker("premerge-us-central-windows", properties={'jobs': 64}, max_builds=3), - create_worker("premerge-us-west-linux", properties={'jobs': 64}, max_builds=3), - create_worker("premerge-us-west-windows", properties={'jobs': 64}, max_builds=3), + create_worker("premerge-us-central-linux-b1", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-central-linux-b2", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-central-linux-b3", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-central-windows-b1", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-central-windows-b2", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-central-windows-b3", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-west-linux-b1", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-west-linux-b2", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-west-linux-b3", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-west-windows-b1", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-west-windows-b2", properties={'jobs': 64}, max_builds=1), + create_worker("premerge-us-west-windows-b3", properties={'jobs': 64}, max_builds=1), # Workers for the profcheck configuration # These workers run builds with LLVM_ENABLE_PROFCHECK=ON to ensure From 3adb665722c42f77a1285cc13051a77e666a39fd Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 20 Aug 2025 15:20:55 +0000 Subject: [PATCH 084/135] feedback --- buildbot/google/scripts/profcheck.sh | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 buildbot/google/scripts/profcheck.sh diff --git a/buildbot/google/scripts/profcheck.sh b/buildbot/google/scripts/profcheck.sh new file mode 100644 index 000000000..9703c69ba --- /dev/null +++ b/buildbot/google/scripts/profcheck.sh @@ -0,0 +1,48 @@ +set -ex + +apt-get update +apt-get install -y python3 python3-pip cmake ninja-build git ccache lsb-release wget software-properties-common gnupg wget +pip3 install --break-system-packages buildbot-worker==3.11.7 + +bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" 20 +ln -sf /usr/bin/clang-20 /usr/bin/cc +ln -sf /usr/bin/clang++-20 /usr/bin/c++ +ln -sf /usr/bin/ld.lld-20 /usr/bin/ld + +rm -rf /b +BOT_DIR=/b +SERVER_PORT=9994 +WORKER_NAME="$(hostname)" +WORKER_PASSWORD="$(gsutil cat gs://sanitizer-buildbot/$(hostname)-password)" + +userdel buildbot | true +groupadd -f buildbot +useradd buildbot -g buildbot -m -d /b/home +chown buildbot:buildbot $BOT_DIR + +sudo -u buildbot buildbot-worker create-worker -f --allow-shutdown=signal $BOT_DIR lab.llvm.org:$SERVER_PORT \ + "${WORKER_NAME}" "${WORKER_PASSWORD}" + +{ + echo "Mircea Trofin " + echo "Aiden Grossman " +} > $BOT_DIR/info/admin + +{ + echo "To reproduce locally, use a standard CMake invocation with -DLLVM_ENABLE_PROFCHECK=ON and -DLLVM_LIT_ARGS='--exclude-xfail'" + echo "Example:" + echo "cmake -GNinja" + echo " -DCMAKE_BUILD_TYPE=Release" + echo " -DLLVM_ENABLE_ASSERTIONS=ON" + echo " -DLLVM_LIT_ARGS='--exclude-xfail'" + echo " -DLLVM_ENABLE_PROFCHECK=ON" + echo + uname -a | head -n1 + date + cmake --version | head -n1 + c++ --version | head -n1 + ld --version | head -n1 + lscpu +} > $BOT_DIR/info/host + +sudo -u buildbot buildbot-worker start $BOT_DIR From b1fcfbbb8a816710eca4665de3f8ef6fc13c9bd6 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 20 Aug 2025 15:21:58 +0000 Subject: [PATCH 085/135] Remove errantly committed buildbot script My editor kept the file in cache and saved when I checked out a different branch and I was still on main. --- buildbot/google/scripts/profcheck.sh | 48 ---------------------------- 1 file changed, 48 deletions(-) delete mode 100644 buildbot/google/scripts/profcheck.sh diff --git a/buildbot/google/scripts/profcheck.sh b/buildbot/google/scripts/profcheck.sh deleted file mode 100644 index 9703c69ba..000000000 --- a/buildbot/google/scripts/profcheck.sh +++ /dev/null @@ -1,48 +0,0 @@ -set -ex - -apt-get update -apt-get install -y python3 python3-pip cmake ninja-build git ccache lsb-release wget software-properties-common gnupg wget -pip3 install --break-system-packages buildbot-worker==3.11.7 - -bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" 20 -ln -sf /usr/bin/clang-20 /usr/bin/cc -ln -sf /usr/bin/clang++-20 /usr/bin/c++ -ln -sf /usr/bin/ld.lld-20 /usr/bin/ld - -rm -rf /b -BOT_DIR=/b -SERVER_PORT=9994 -WORKER_NAME="$(hostname)" -WORKER_PASSWORD="$(gsutil cat gs://sanitizer-buildbot/$(hostname)-password)" - -userdel buildbot | true -groupadd -f buildbot -useradd buildbot -g buildbot -m -d /b/home -chown buildbot:buildbot $BOT_DIR - -sudo -u buildbot buildbot-worker create-worker -f --allow-shutdown=signal $BOT_DIR lab.llvm.org:$SERVER_PORT \ - "${WORKER_NAME}" "${WORKER_PASSWORD}" - -{ - echo "Mircea Trofin " - echo "Aiden Grossman " -} > $BOT_DIR/info/admin - -{ - echo "To reproduce locally, use a standard CMake invocation with -DLLVM_ENABLE_PROFCHECK=ON and -DLLVM_LIT_ARGS='--exclude-xfail'" - echo "Example:" - echo "cmake -GNinja" - echo " -DCMAKE_BUILD_TYPE=Release" - echo " -DLLVM_ENABLE_ASSERTIONS=ON" - echo " -DLLVM_LIT_ARGS='--exclude-xfail'" - echo " -DLLVM_ENABLE_PROFCHECK=ON" - echo - uname -a | head -n1 - date - cmake --version | head -n1 - c++ --version | head -n1 - ld --version | head -n1 - lscpu -} > $BOT_DIR/info/host - -sudo -u buildbot buildbot-worker start $BOT_DIR From 7c6f6516bbb49b773b7eec9812eab84dd4c66820 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 20 Aug 2025 08:25:36 -0700 Subject: [PATCH 086/135] Add profcheck init script (#561) This is to be run on VM startup to setup the machine and start the buildbot. --- buildbot/google/scripts/profcheck.sh | 48 ++++++++++++++++++++++++++++ premerge/main.tf | 8 ++--- 2 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 buildbot/google/scripts/profcheck.sh diff --git a/buildbot/google/scripts/profcheck.sh b/buildbot/google/scripts/profcheck.sh new file mode 100644 index 000000000..9703c69ba --- /dev/null +++ b/buildbot/google/scripts/profcheck.sh @@ -0,0 +1,48 @@ +set -ex + +apt-get update +apt-get install -y python3 python3-pip cmake ninja-build git ccache lsb-release wget software-properties-common gnupg wget +pip3 install --break-system-packages buildbot-worker==3.11.7 + +bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" 20 +ln -sf /usr/bin/clang-20 /usr/bin/cc +ln -sf /usr/bin/clang++-20 /usr/bin/c++ +ln -sf /usr/bin/ld.lld-20 /usr/bin/ld + +rm -rf /b +BOT_DIR=/b +SERVER_PORT=9994 +WORKER_NAME="$(hostname)" +WORKER_PASSWORD="$(gsutil cat gs://sanitizer-buildbot/$(hostname)-password)" + +userdel buildbot | true +groupadd -f buildbot +useradd buildbot -g buildbot -m -d /b/home +chown buildbot:buildbot $BOT_DIR + +sudo -u buildbot buildbot-worker create-worker -f --allow-shutdown=signal $BOT_DIR lab.llvm.org:$SERVER_PORT \ + "${WORKER_NAME}" "${WORKER_PASSWORD}" + +{ + echo "Mircea Trofin " + echo "Aiden Grossman " +} > $BOT_DIR/info/admin + +{ + echo "To reproduce locally, use a standard CMake invocation with -DLLVM_ENABLE_PROFCHECK=ON and -DLLVM_LIT_ARGS='--exclude-xfail'" + echo "Example:" + echo "cmake -GNinja" + echo " -DCMAKE_BUILD_TYPE=Release" + echo " -DLLVM_ENABLE_ASSERTIONS=ON" + echo " -DLLVM_LIT_ARGS='--exclude-xfail'" + echo " -DLLVM_ENABLE_PROFCHECK=ON" + echo + uname -a | head -n1 + date + cmake --version | head -n1 + c++ --version | head -n1 + ld --version | head -n1 + lscpu +} > $BOT_DIR/info/host + +sudo -u buildbot buildbot-worker start $BOT_DIR diff --git a/premerge/main.tf b/premerge/main.tf index 2371ebfb6..c9a542289 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -171,9 +171,9 @@ module "premerge_cluster_us_central_resources" { linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.linux_object_cache_gcp_service_account_email windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.windows_2022_object_cache_gcp_service_account_email github_arc_version = "0.12.1" - linux_buildbot_name = "premerge-us-central-linux" + linux_buildbot_name = "premerge-us-central-linux-b1" linux_buildbot_password = data.google_secret_manager_secret_version.us_central_linux_buildbot_password.secret_data - windows_buildbot_name = "premerge-us-central-windows" + windows_buildbot_name = "premerge-us-central-windows-b1" windows_buildbot_password = data.google_secret_manager_secret_version.us_central_windows_buildbot_password.secret_data linux_object_cache_buildbot_service_account_email = module.premerge_cluster_us_central.linux_object_cache_buildbot_service_account_email windows_2022_object_cache_buildbot_service_account_email = module.premerge_cluster_us_central.windows_2022_object_cache_buildbot_service_account_email @@ -198,9 +198,9 @@ module "premerge_cluster_us_west_resources" { linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.linux_object_cache_gcp_service_account_email windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.windows_2022_object_cache_gcp_service_account_email github_arc_version = "0.12.1" - linux_buildbot_name = "premerge-us-west-linux" + linux_buildbot_name = "premerge-us-west-linux-b1" linux_buildbot_password = data.google_secret_manager_secret_version.us_west_linux_buildbot_password.secret_data - windows_buildbot_name = "premerge-us-west-windows" + windows_buildbot_name = "premerge-us-west-windows-b1" windows_buildbot_password = data.google_secret_manager_secret_version.us_west_windows_buildbot_password.secret_data linux_object_cache_buildbot_service_account_email = module.premerge_cluster_us_west.linux_object_cache_buildbot_service_account_email windows_2022_object_cache_buildbot_service_account_email = module.premerge_cluster_us_west.windows_2022_object_cache_buildbot_service_account_email From 7cbdfaebbba5a5c2b67cced725d95a24352de88a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 20 Aug 2025 18:08:43 +0000 Subject: [PATCH 087/135] [CI] Update premerge buildbot deployments We added more buildbots and renamed the existing ones, so update the TF to reflect this. --- premerge/main.tf | 8 +++--- premerge/premerge_resources/main.tf | 32 +++++++++++++++++++++--- premerge/premerge_resources/variables.tf | 4 +-- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/premerge/main.tf b/premerge/main.tf index c9a542289..cc6ea3e23 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -171,9 +171,9 @@ module "premerge_cluster_us_central_resources" { linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.linux_object_cache_gcp_service_account_email windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_central.windows_2022_object_cache_gcp_service_account_email github_arc_version = "0.12.1" - linux_buildbot_name = "premerge-us-central-linux-b1" + linux_buildbot_name_template = "premerge-us-central-linux" linux_buildbot_password = data.google_secret_manager_secret_version.us_central_linux_buildbot_password.secret_data - windows_buildbot_name = "premerge-us-central-windows-b1" + windows_buildbot_name_template = "premerge-us-central-windows" windows_buildbot_password = data.google_secret_manager_secret_version.us_central_windows_buildbot_password.secret_data linux_object_cache_buildbot_service_account_email = module.premerge_cluster_us_central.linux_object_cache_buildbot_service_account_email windows_2022_object_cache_buildbot_service_account_email = module.premerge_cluster_us_central.windows_2022_object_cache_buildbot_service_account_email @@ -198,9 +198,9 @@ module "premerge_cluster_us_west_resources" { linux_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.linux_object_cache_gcp_service_account_email windows_2022_object_cache_gcp_service_account_email = module.premerge_cluster_us_west.windows_2022_object_cache_gcp_service_account_email github_arc_version = "0.12.1" - linux_buildbot_name = "premerge-us-west-linux-b1" + linux_buildbot_name_template = "premerge-us-west-linux" linux_buildbot_password = data.google_secret_manager_secret_version.us_west_linux_buildbot_password.secret_data - windows_buildbot_name = "premerge-us-west-windows-b1" + windows_buildbot_name_template = "premerge-us-west-windows" windows_buildbot_password = data.google_secret_manager_secret_version.us_west_windows_buildbot_password.secret_data linux_object_cache_buildbot_service_account_email = module.premerge_cluster_us_west.linux_object_cache_buildbot_service_account_email windows_2022_object_cache_buildbot_service_account_email = module.premerge_cluster_us_west.windows_2022_object_cache_buildbot_service_account_email diff --git a/premerge/premerge_resources/main.tf b/premerge/premerge_resources/main.tf index 6fd0629bf..5655bdcbb 100644 --- a/premerge/premerge_resources/main.tf +++ b/premerge/premerge_resources/main.tf @@ -341,8 +341,20 @@ resource "kubernetes_service_account" "linux_buildbot_gcs_ksa" { depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot] } -resource "kubernetes_manifest" "linux_buildbot_deployment" { - manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.linux_buildbot_name, buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password", buildbot_region : var.cluster_name })) +resource "kubernetes_manifest" "linux_buildbot_b1_deployment" { + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : format("%s-b1", var.linux_buildbot_name_template), buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password", buildbot_region : var.cluster_name })) + + depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot, kubernetes_secret.linux_buildbot_password] +} + +resource "kubernetes_manifest" "linux_buildbot_b2_deployment" { + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : format("%s-b2", var.linux_buildbot_name_template), buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password", buildbot_region : var.cluster_name })) + + depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot, kubernetes_secret.linux_buildbot_password] +} + +resource "kubernetes_manifest" "linux_buildbot_b3_deployment" { + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : format("%s-b3", var.linux_buildbot_name_template), buildbot_namespace : "llvm-premerge-linux-buildbot", secret_name : "linux-buildbot-password", buildbot_region : var.cluster_name })) depends_on = [kubernetes_namespace.llvm_premerge_linux_buildbot, kubernetes_secret.linux_buildbot_password] } @@ -404,8 +416,20 @@ resource "kubernetes_service_account" "windows_2022_buildbot_gcs_ksa" { depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot] } -resource "kubernetes_manifest" "windows_buildbot_deployment" { - manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : var.windows_buildbot_name, buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password", buildbot_region : var.cluster_name })) +resource "kubernetes_manifest" "windows_buildbot_b1_deployment" { + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : format("%s-b1", var.windows_buildbot_name_template), buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password", buildbot_region : var.cluster_name })) + + depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_2022_buildbot_password] +} + +resource "kubernetes_manifest" "windows_buildbot_b2_deployment" { + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : format("%s-b2", var.windows_buildbot_name_template), buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password", buildbot_region : var.cluster_name })) + + depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_2022_buildbot_password] +} + +resource "kubernetes_manifest" "windows_buildbot_b3_deployment" { + manifest = yamldecode(templatefile("buildbot_deployment.yaml", { buildbot_name : format("%s-b3", var.windows_buildbot_name_template), buildbot_namespace : "llvm-premerge-windows-2022-buildbot", secret_name : "windows-buildbot-password", buildbot_region : var.cluster_name })) depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_buildbot, kubernetes_secret.windows_2022_buildbot_password] } diff --git a/premerge/premerge_resources/variables.tf b/premerge/premerge_resources/variables.tf index 0528b26ea..eae2394fd 100644 --- a/premerge/premerge_resources/variables.tf +++ b/premerge/premerge_resources/variables.tf @@ -104,7 +104,7 @@ variable "windows_2022_object_cache_gcp_service_account_email" { type = string } -variable "linux_buildbot_name" { +variable "linux_buildbot_name_template" { description = "The name of the linux buildbot that will run tests postcommit." type = string } @@ -114,7 +114,7 @@ variable "linux_buildbot_password" { type = string } -variable "windows_buildbot_name" { +variable "windows_buildbot_name_template" { description = "The name of the windows buildbot that will run tests postcommit." type = string } From e50bb4deb9c6d1b9428672977d44f31d0c2183ca Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Wed, 20 Aug 2025 16:52:46 -0400 Subject: [PATCH 088/135] [CI] Use iam_member instead of iam_binding for BQ JobUser role (#562) This change makes it so the `bigquery.jobUser` role can be granted to new members while preserving grants for past members. `google_project_iam_binding` is authoritative, and revokes role access for all other members who are not listed in the resource definition when running `terraform apply`. This is problematic as we now have another internal service account that needs access to `role/bigquery.jobUser`, and it's membership will continue to be revoked as long as we define a binding here. --- premerge/main.tf | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/premerge/main.tf b/premerge/main.tf index cc6ea3e23..8fbbb8988 100644 --- a/premerge/main.tf +++ b/premerge/main.tf @@ -262,13 +262,10 @@ resource "google_service_account" "operational_metrics_gsa" { display_name = "Operational Metrics GSA" } -resource "google_project_iam_binding" "bigquery_jobuser_binding" { +resource "google_project_iam_member" "operational_metrics_gsa_bq_jobuser_member" { project = google_service_account.operational_metrics_gsa.project role = "roles/bigquery.jobUser" - - members = [ - "serviceAccount:${google_service_account.operational_metrics_gsa.email}", - ] + member = "serviceAccount:${google_service_account.operational_metrics_gsa.email}" depends_on = [google_service_account.operational_metrics_gsa] } From 361225bdbcd634ab5584f9722cb118f736855950 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 20 Aug 2025 18:49:49 -0700 Subject: [PATCH 089/135] [ProfCheck] Switch to annotated builder (#563) This allows setting the LIT_XFAIL environment variable before running ninja which enables excluding tests from a file in the LLVM tree. I do not believe this is possible outside of an annotated builder. --- buildbot/osuosl/master/config/builders.py | 22 +++++++----------- zorg/buildbot/builders/annotated/profcheck.sh | 23 +++++++++++++++++++ 2 files changed, 31 insertions(+), 14 deletions(-) create mode 100644 zorg/buildbot/builders/annotated/profcheck.sh diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index f5d16ef66..cadba48ec 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3606,20 +3606,14 @@ # Builders for the profcheck configuration # These workers run builds with LLVM_ENABLE_PROFCHECK=ON to ensure # that profile information is propagated correctly. - {'name' : "profcheck", - 'workernames' : ["profcheck-b1", "profcheck-b2"], - 'builddir': "profcheck-build", - 'factory' : UnifiedTreeBuilder.getCmakeWithNinjaBuildFactory( - clean=True, - depends_on_projects=['llvm'], - extra_configure_args=[ - "-DCMAKE_BUILD_TYPE=Release", - "-DCMAKE_C_COMPILER_LAUNCHER=ccache", - "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", - "-DLLVM_ENABLE_ASSERTIONS=ON", - "-DLLVM_LIT_ARGS='--exclude-xfail'", - "-DLLVM_ENABLE_PROFCHECK=ON", - ])}, + { + "name": "profcheck", + "workernames": ["profcheck-b1", "profcheck-b2"], + "builddir": "profcheck-build", + "factory": AnnotatedBuilder.getAnnotatedBuildFactory( + script="profcheck.sh", clean=True, depends_on_projects=["llvm"] + ), + }, ] # LLDB remote-linux builder env variables. diff --git a/zorg/buildbot/builders/annotated/profcheck.sh b/zorg/buildbot/builders/annotated/profcheck.sh new file mode 100644 index 000000000..23cd381d1 --- /dev/null +++ b/zorg/buildbot/builders/annotated/profcheck.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -ex + +rm -rf build +mkdir build +cd build + +echo @@@CMake@@@ + +cmake -GNinja \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DLLVM_LIT_ARGS='--exclude-xfail' \ + -DLLVM_ENABLE_PROFCHECK=ON \ + ../llvm + +echo @@@Ninja@@@ + +export LIT_XFAIL="$(cat ../llvm/utils/profcheck-xfail.txt | tr '\n' ';')" +ninja check-llvm From 0195fe053f40d329aca246b265427227d4c2223e Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 13:02:36 +0000 Subject: [PATCH 090/135] [ProfCheck] Set script_interpreter to None The annotated builder is a shell script, not a python script. Trying to run it with python results in an error. --- buildbot/osuosl/master/config/builders.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index cadba48ec..734e486eb 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3611,7 +3611,10 @@ "workernames": ["profcheck-b1", "profcheck-b2"], "builddir": "profcheck-build", "factory": AnnotatedBuilder.getAnnotatedBuildFactory( - script="profcheck.sh", clean=True, depends_on_projects=["llvm"] + script="profcheck.sh", + clean=True, + depends_on_projects=["llvm"], + script_interpreter=None, ), }, ] From c3087cb7d0e755b7f9d655b5446ad73f210598dd Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 07:58:15 -0700 Subject: [PATCH 091/135] [ProfCheck] Disable collapsing requests (#566) The build is fast enough that we have enough capacity for this. Disabling request collapsing ensures that the blame list is limited to a single commit. --- buildbot/osuosl/master/config/builders.py | 1 + 1 file changed, 1 insertion(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 734e486eb..70473c4c4 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3609,6 +3609,7 @@ { "name": "profcheck", "workernames": ["profcheck-b1", "profcheck-b2"], + "collapseRequests": False, "builddir": "profcheck-build", "factory": AnnotatedBuilder.getAnnotatedBuildFactory( script="profcheck.sh", From 5f7706a7d265935b40de59a2e453cb0992b18ce3 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 08:22:52 -0700 Subject: [PATCH 092/135] [CI] Disable collapsing requests on premerge builders (#565) We have enough capacity on the premerge builders to not collapse requests (test multiple commits at the same time). This ensures blame lists are always limited to a single user. --- buildbot/osuosl/master/config/builders.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 70473c4c4..38794192b 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3555,6 +3555,7 @@ "premerge-us-west-linux-b2", "premerge-us-west-linux-b3", ], + "collapseRequests": False, "builddir": "premerge-monolithic-linux", "factory": AnnotatedBuilder.getAnnotatedBuildFactory( script="premerge/dispatch_job.py", @@ -3579,6 +3580,7 @@ }, { "name": "premerge-monolithic-windows", + "collapseRequests": False, "workernames": [ "premerge-us-central-windows-b1", "premerge-us-central-windows-b2", From d3d37bda0d72f226f57294a5b3651351de60c6ff Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 17:22:42 +0000 Subject: [PATCH 093/135] [ProfCheck] Make annotated builder script executable Since we are not using a script interpreter this fails to run when directly invoked due to the file mode being set incorrectly. --- zorg/buildbot/builders/annotated/profcheck.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 zorg/buildbot/builders/annotated/profcheck.sh diff --git a/zorg/buildbot/builders/annotated/profcheck.sh b/zorg/buildbot/builders/annotated/profcheck.sh old mode 100644 new mode 100755 From 18eee1c5ca9c37e9b3f7d22fa1701cf4628310f2 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 17:31:57 +0000 Subject: [PATCH 094/135] [ProfCheck] Fix paths for profcheck annotated script Buildbot does the checkout in BUILDBOT_ROOT/llvm-project and then does the build in BUILDBOT_ROOT/build. Make the script reflect this. --- zorg/buildbot/builders/annotated/profcheck.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/zorg/buildbot/builders/annotated/profcheck.sh b/zorg/buildbot/builders/annotated/profcheck.sh index 23cd381d1..2533b7cef 100755 --- a/zorg/buildbot/builders/annotated/profcheck.sh +++ b/zorg/buildbot/builders/annotated/profcheck.sh @@ -2,10 +2,6 @@ set -ex -rm -rf build -mkdir build -cd build - echo @@@CMake@@@ cmake -GNinja \ @@ -15,9 +11,10 @@ cmake -GNinja \ -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ -DLLVM_LIT_ARGS='--exclude-xfail' \ -DLLVM_ENABLE_PROFCHECK=ON \ - ../llvm + ../llvm-project/llvm echo @@@Ninja@@@ export LIT_XFAIL="$(cat ../llvm/utils/profcheck-xfail.txt | tr '\n' ';')" ninja check-llvm + From 74be67b7ee9bb53110ff0f7734789b19a3e5806c Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 17:34:42 +0000 Subject: [PATCH 095/135] [ProfCheck] Use ccache sccache is not installed on the builder. --- zorg/buildbot/builders/annotated/profcheck.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/profcheck.sh b/zorg/buildbot/builders/annotated/profcheck.sh index 2533b7cef..a42bc6266 100755 --- a/zorg/buildbot/builders/annotated/profcheck.sh +++ b/zorg/buildbot/builders/annotated/profcheck.sh @@ -8,7 +8,7 @@ cmake -GNinja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_ASSERTIONS=ON \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DLLVM_LIT_ARGS='--exclude-xfail' \ -DLLVM_ENABLE_PROFCHECK=ON \ ../llvm-project/llvm From 2ed6506c046aa4fee3990dd69a355e5fba1ffc41 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 10:40:07 -0700 Subject: [PATCH 096/135] [CI] Move ops container under premerge (#494) Top level directories should probably be reserved for majorish efforts. I think the ops-container definitions make a lot more sense to have under the premerge folder even if they are not perfectly aligned. --- .../workflows/build-operations-metrics-container.yml | 10 +++++----- .../ops-container/Dockerfile | 0 .../ops-container/process_llvm_commits.py | 0 .../ops-container/requirements.lock.txt | 0 .../ops-container/requirements.txt | 0 5 files changed, 5 insertions(+), 5 deletions(-) rename {llvm-ops-metrics => premerge}/ops-container/Dockerfile (100%) rename {llvm-ops-metrics => premerge}/ops-container/process_llvm_commits.py (100%) rename {llvm-ops-metrics => premerge}/ops-container/requirements.lock.txt (100%) rename {llvm-ops-metrics => premerge}/ops-container/requirements.txt (100%) diff --git a/.github/workflows/build-operations-metrics-container.yml b/.github/workflows/build-operations-metrics-container.yml index c938f373f..c331e70aa 100644 --- a/.github/workflows/build-operations-metrics-container.yml +++ b/.github/workflows/build-operations-metrics-container.yml @@ -9,11 +9,11 @@ on: - main paths: - .github/workflows/build-operations-metrics-container.yml - - 'llvm-ops-metrics/ops-container/**' + - 'premerge/ops-container/**' pull_request: paths: - .github/workflows/build-operations-metrics-container.yml - - 'llvm-ops-metrics/ops-container/**' + - 'premerge/ops-container/**' jobs: build-operations-metrics-container: @@ -27,7 +27,7 @@ jobs: - name: Checkout LLVM Zorg uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - sparse-checkout: llvm-ops-metrics/ops-container + sparse-checkout: premerge/ops-container - name: Write Variables id: vars run: | @@ -37,9 +37,9 @@ jobs: echo "container-name-tag=$container_name:$tag" >> $GITHUB_OUTPUT echo "container-filename=$(echo $container_name:$tag | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT - name: Build Container - working-directory: ./llvm-ops-metrics/ops-container + working-directory: premerge/ops-container run: | - podman build -t ${{ steps.vars.outputs.container-name-tag }} -f Dockerfile . + podman build -t ${{ steps.vars.outputs.container-name-tag }} . # Save the container so we have it in case the push fails. This also # allows us to separate the push step into a different job so we can # maintain minimal permissions while building the container. diff --git a/llvm-ops-metrics/ops-container/Dockerfile b/premerge/ops-container/Dockerfile similarity index 100% rename from llvm-ops-metrics/ops-container/Dockerfile rename to premerge/ops-container/Dockerfile diff --git a/llvm-ops-metrics/ops-container/process_llvm_commits.py b/premerge/ops-container/process_llvm_commits.py similarity index 100% rename from llvm-ops-metrics/ops-container/process_llvm_commits.py rename to premerge/ops-container/process_llvm_commits.py diff --git a/llvm-ops-metrics/ops-container/requirements.lock.txt b/premerge/ops-container/requirements.lock.txt similarity index 100% rename from llvm-ops-metrics/ops-container/requirements.lock.txt rename to premerge/ops-container/requirements.lock.txt diff --git a/llvm-ops-metrics/ops-container/requirements.txt b/premerge/ops-container/requirements.txt similarity index 100% rename from llvm-ops-metrics/ops-container/requirements.txt rename to premerge/ops-container/requirements.txt From 513d1e782390df322a2530be8e3b19f713d618c9 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 17:52:01 +0000 Subject: [PATCH 097/135] [ProfCheck] Fix test exclusion path Same issue as in the previous profcheck commit. --- zorg/buildbot/builders/annotated/profcheck.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/profcheck.sh b/zorg/buildbot/builders/annotated/profcheck.sh index a42bc6266..c2d58360b 100755 --- a/zorg/buildbot/builders/annotated/profcheck.sh +++ b/zorg/buildbot/builders/annotated/profcheck.sh @@ -15,6 +15,6 @@ cmake -GNinja \ echo @@@Ninja@@@ -export LIT_XFAIL="$(cat ../llvm/utils/profcheck-xfail.txt | tr '\n' ';')" +export LIT_XFAIL="$(cat ../llvm-project/llvm/utils/profcheck-xfail.txt | tr '\n' ';')" ninja check-llvm From 8c4951447fd135ad79805d02564e0cf208b5f7a4 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 21 Aug 2025 20:03:46 +0000 Subject: [PATCH 098/135] [ProfCheck] Use Build Step annotations Otherwise the preamble takes forever to propagate and the build takes forever to complete despite finishing quite fast on the bot. This is what I intended originally but did not double check the annotation format. --- zorg/buildbot/builders/annotated/profcheck.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zorg/buildbot/builders/annotated/profcheck.sh b/zorg/buildbot/builders/annotated/profcheck.sh index c2d58360b..a040436f7 100755 --- a/zorg/buildbot/builders/annotated/profcheck.sh +++ b/zorg/buildbot/builders/annotated/profcheck.sh @@ -2,7 +2,7 @@ set -ex -echo @@@CMake@@@ +echo @@@BUILD_STEP CMake@@@ cmake -GNinja \ -DCMAKE_BUILD_TYPE=Release \ @@ -13,8 +13,7 @@ cmake -GNinja \ -DLLVM_ENABLE_PROFCHECK=ON \ ../llvm-project/llvm -echo @@@Ninja@@@ +echo @@@BUILD_STEP Ninja@@@ export LIT_XFAIL="$(cat ../llvm-project/llvm/utils/profcheck-xfail.txt | tr '\n' ';')" ninja check-llvm - From 6e21c15a01137c34965b53d5ecc27f15f7097f97 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 22 Aug 2025 10:41:48 +0200 Subject: [PATCH 099/135] Also notify @cmtice for lldb-x86_64-debian failures (#556) --- buildbot/osuosl/master/config/status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildbot/osuosl/master/config/status.py b/buildbot/osuosl/master/config/status.py index 3d3e64b35..f9a6210b1 100644 --- a/buildbot/osuosl/master/config/status.py +++ b/buildbot/osuosl/master/config/status.py @@ -242,7 +242,7 @@ def getReporters(): reporters.MailNotifier( fromaddr = status_email_fromaddr, sendToInterestedUsers = False, - extraRecipients = ["labath@google.com"], + extraRecipients = ["labath@google.com", "cmtice@google.com"], generators = [ utils.LLVMDefaultBuildStatusGenerator( builders = ["lldb-x86_64-debian"]) From 0e553221c88617d1fbe1ead6bb7b95fa05ffc950 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 22 Aug 2025 10:32:35 -0700 Subject: [PATCH 100/135] [ProfCheck] Add email notifiers (#569) Send email to the group on new failures or when the buildbot is missing. --- buildbot/osuosl/master/config/status.py | 10 +++++++++- buildbot/osuosl/master/config/workers.py | 15 ++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/buildbot/osuosl/master/config/status.py b/buildbot/osuosl/master/config/status.py index f9a6210b1..3bcb9c331 100644 --- a/buildbot/osuosl/master/config/status.py +++ b/buildbot/osuosl/master/config/status.py @@ -568,7 +568,15 @@ def getReporters(): builders = [ "llvm-x86_64-debugify-coverage"]) ]), - + reporters.MailNotifier( + fromaddr = status_email_fromaddr, + sendToInterestedUsers = False, + extraRecipients = ["profcheck-buildbot@google.com"], + generators = [ + utils.LLVMDefaultBuildStatusGenerator( + builders = [ + "profcheck"]) + ]), ]) return r diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index e903088ce..5116adf17 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -424,9 +424,18 @@ def get_all(): # Workers for the profcheck configuration # These workers run builds with LLVM_ENABLE_PROFCHECK=ON to ensure # that profile information is propagated correctly. - create_worker("profcheck-b1", properties={'jobs': 64}, max_builds=1), - create_worker("profcheck-b2", properties={'jobs': 64}, max_builds=1), - + create_worker( + "profcheck-b1", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=["profcheck-buildbot@google.com"], + ), + create_worker( + "profcheck-b2", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=["profcheck-buildbot@google.com"], + ), # FIXME: A placeholder for annoying worker which nobody could stop. # adding it avoid logs spammed by failed authentication for that worker. create_worker("mlir-ubuntu-worker0"), From 0884c05932267aa52182daa5f652c3e3a6ffa5fe Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 22 Aug 2025 13:56:57 -0700 Subject: [PATCH 101/135] [CI] Add notifications for premerge buildbots (#568) This is so that the appropriate people are getting notifications when there are failures related to the premerge buildbots. Most of the time no action is needed as build failures happen reasonably often and are usually fixed quickly. Having the notifications (especially for missing workers) helps ensure that the infra is not down/broken for a while. Currently adding the premerge rotation, myself and Caroline. We might eventually want to only have the premerge rotation on this, but as we stabilize the infrastructure, having more people get notifications should be helpful. --- buildbot/osuosl/master/config/status.py | 17 +++ buildbot/osuosl/master/config/workers.py | 133 ++++++++++++++++++++--- 2 files changed, 137 insertions(+), 13 deletions(-) diff --git a/buildbot/osuosl/master/config/status.py b/buildbot/osuosl/master/config/status.py index 3bcb9c331..ffe40d0e4 100644 --- a/buildbot/osuosl/master/config/status.py +++ b/buildbot/osuosl/master/config/status.py @@ -577,6 +577,23 @@ def getReporters(): builders = [ "profcheck"]) ]), + reporters.MailNotifier( + fromaddr=status_email_fromaddr, + sendToInterestedUsers=False, + extraRecipients=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + generators=[ + utils.LLVMDefaultBuildStatusGenerator( + subject="Premerge Buildbot Failure: {{ buildername }}", + builders=[ + "premerge-monolithic-linux", + "premerge-monolithic-windows", + ], + ) + ]), ]) return r diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 5116adf17..9775348d6 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -408,19 +408,126 @@ def get_all(): # postcommit (after changes have landed in main). The workers for the # infrastructure that runs the checks premerge are setup through Github # Actions under the premerge/ folder in llvm-zorg. - create_worker("premerge-us-central-linux-b1", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-central-linux-b2", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-central-linux-b3", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-central-windows-b1", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-central-windows-b2", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-central-windows-b3", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-west-linux-b1", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-west-linux-b2", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-west-linux-b3", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-west-windows-b1", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-west-windows-b2", properties={'jobs': 64}, max_builds=1), - create_worker("premerge-us-west-windows-b3", properties={'jobs': 64}, max_builds=1), - + create_worker( + "premerge-us-central-linux-b1", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-central-linux-b2", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-central-linux-b3", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-central-windows-b1", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-central-windows-b2", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-central-windows-b3", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-west-linux-b1", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-west-linux-b2", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-west-linux-b3", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-west-windows-b1", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-west-windows-b2", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), + create_worker( + "premerge-us-west-windows-b3", + properties={"jobs": 64}, + max_builds=1, + notify_on_missing=[ + "llvm-presubmit-infra@google.com", + "aidengrossman@google.com", + "cmtice@google.com", + ], + ), # Workers for the profcheck configuration # These workers run builds with LLVM_ENABLE_PROFCHECK=ON to ensure # that profile information is propagated correctly. From f96946b1738f9d4cb21313f2745cc8100e55f00f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 24 Aug 2025 12:03:16 -0700 Subject: [PATCH 102/135] [CI] Add libcxx/libcxxabi/libunwind to premerge dependent projects (#572) Otherwise we end up with commits like https://github.com/llvm/llvm-project/commit/f5e687d7bf49cd9fe38ba7acdeb52d4f30468dee that break the postcommit bot, but do not actually show up when looking through the builds, making manual bisecting much more difficult. --- buildbot/osuosl/master/config/builders.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 38794192b..576461baa 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3575,6 +3575,9 @@ "llvm", "mlir", "polly", + "libunwind", + "libcxx", + "libcxxabi", ], ), }, From 7c3f8a3ee2750795f2a1c0b81cb91cb68ab8d423 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 25 Aug 2025 09:43:38 -0700 Subject: [PATCH 103/135] Switch sanitizer-windows bot over to runtimes build (#570) This enables us to catch issues like https://g-issues.chromium.org/issues/437182411#comment20 at postcommit testing time rather than when trying to integrate into downstream projects. --- buildbot/osuosl/master/config/builders.py | 15 ++++- .../builders/annotated/sanitizer-windows.py | 64 ------------------- 2 files changed, 13 insertions(+), 66 deletions(-) delete mode 100644 zorg/buildbot/builders/annotated/sanitizer-windows.py diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 576461baa..87b9a72fa 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -1829,9 +1829,20 @@ 'tags' : ["sanitizer"], 'workernames' : ["sanitizer-windows"], 'builddir': "sanitizer-windows", - 'factory' : AnnotatedBuilder.getAnnotatedBuildFactory( - script="sanitizer-windows.py", + 'factory': UnifiedTreeBuilder.getCmakeWithNinjaBuildFactory( + clean=True, depends_on_projects=["llvm", "clang", "lld", "compiler-rt"], + # TODO(boomanaiden154): We should probably be using sccache here. + extra_configure_args=[ + "-DCMAKE_BUILD_TYPE=Release", + "-DLLVM_ENABLE_ASSERTIONS=ON", + "-DLLVM_ENABLE_PDB=ON", + "-DLLVM_TARGETS_TO_BUILD=X86", + "-DCOMPILER_RT_BUILD_BUILTINS=ON", + "-DCOMPILER_RT_BUILD_ORC=OFF", + + ], + checks=["check-compiler-rt"], # FIXME: Restore `timeout` to default when fixed https://github.com/llvm/llvm-project/issues/102513 timeout=2400)}, diff --git a/zorg/buildbot/builders/annotated/sanitizer-windows.py b/zorg/buildbot/builders/annotated/sanitizer-windows.py deleted file mode 100644 index d654e5b8f..000000000 --- a/zorg/buildbot/builders/annotated/sanitizer-windows.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/python - -import os -import sys -import annotated_builder -import util - - -class SanitizerAnnotatedBuilder(annotated_builder.AnnotatedBuilder): - - """Customizes the 'build' step of the generic AnnotatedBuilder""" - - def build(self, stage_name, build_dir, jobs=None): - # The basic idea here is to run 'ninja compiler-rt ; ninja clang lld'. - # This ensures that portability issues in compiler-rt code are found - # first. Then, we only build clang and lld, the primary dependencies of - # the sanitizer test suites, to keep cycle time low. This means there - # are still some remaining test dependencies (FileCheck) that may be - # compiled during the check step, but there shouldn't be that many. - self.report_build_step('%s build' % (stage_name,)) - self.halt_on_failure() - base_cmd = ['ninja'] - if jobs: - base_cmd += ['-j', str(jobs)] - early_targets = ['compiler-rt'] - late_targets = ['clang', 'lld'] - util.report_run_cmd(base_cmd + early_targets, cwd=build_dir) - util.report_run_cmd(base_cmd + late_targets, cwd=build_dir) - - -def main(argv): - ap = annotated_builder.get_argument_parser() - args = ap.parse_args(argv[1:]) - - projects = ['llvm', 'clang', 'lld', 'compiler-rt'] - stages = 1 - extra_cmake_args = [ - '-DCMAKE_BUILD_TYPE=Release', - '-DLLVM_ENABLE_PDB=ON', - '-DLLVM_ENABLE_ASSERTIONS=ON', - '-DLLVM_TARGETS_TO_BUILD=X86', - ] - check_targets = ['check-asan', 'check-asan-dynamic', 'check-sanitizer', - 'check-ubsan', 'check-fuzzer', 'check-cfi', - 'check-profile', 'check-builtins'] - - # These arguments are a bit misleading, they really mean use cl.exe for - # stage1 instead of GCC. - compiler = 'clang-cl' - linker = 'lld-link' - - builder = SanitizerAnnotatedBuilder() - builder.run_steps(stages=stages, - projects=projects, - extra_cmake_args=extra_cmake_args, - check_targets=check_targets, - compiler=compiler, - linker=linker, - jobs=args.jobs) - - -if __name__ == '__main__': - sys.path.append(os.path.dirname(__file__)) - sys.exit(main(sys.argv)) From b0b800d87fb5b8a7bb82fbc6abd7bd61767dc494 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 25 Aug 2025 20:12:07 +0000 Subject: [PATCH 104/135] Explicitly specify compiler/linker for sanitizer-windows Otherwise the bot fails to pick up the compiler. --- buildbot/osuosl/master/config/builders.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 87b9a72fa..ca3731236 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -1840,7 +1840,9 @@ "-DLLVM_TARGETS_TO_BUILD=X86", "-DCOMPILER_RT_BUILD_BUILTINS=ON", "-DCOMPILER_RT_BUILD_ORC=OFF", - + "-DCMAKE_C_COMPILER=cl", + "-DCMAKE_CXX_COMPILER=cl", + "-DCMAKE_LINKER=lld-link", ], checks=["check-compiler-rt"], # FIXME: Restore `timeout` to default when fixed https://github.com/llvm/llvm-project/issues/102513 From 675e045b7836ad8f17734a44668d0e536b6d552e Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 25 Aug 2025 20:44:20 +0000 Subject: [PATCH 105/135] Autodetect VS install on sanitizer-windows --- buildbot/osuosl/master/config/builders.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index ca3731236..bddb032f0 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -1829,7 +1829,8 @@ 'tags' : ["sanitizer"], 'workernames' : ["sanitizer-windows"], 'builddir': "sanitizer-windows", - 'factory': UnifiedTreeBuilder.getCmakeWithNinjaBuildFactory( + 'factory': UnifiedTreeBuilder.getCmakeWithNinjaWithMSVCBuildFactory( + vs="autodetect", clean=True, depends_on_projects=["llvm", "clang", "lld", "compiler-rt"], # TODO(boomanaiden154): We should probably be using sccache here. @@ -1840,9 +1841,6 @@ "-DLLVM_TARGETS_TO_BUILD=X86", "-DCOMPILER_RT_BUILD_BUILTINS=ON", "-DCOMPILER_RT_BUILD_ORC=OFF", - "-DCMAKE_C_COMPILER=cl", - "-DCMAKE_CXX_COMPILER=cl", - "-DCMAKE_LINKER=lld-link", ], checks=["check-compiler-rt"], # FIXME: Restore `timeout` to default when fixed https://github.com/llvm/llvm-project/issues/102513 From fb16a3d0dc0572a586b0711626c38ee42af6fae9 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 26 Aug 2025 20:14:08 +0000 Subject: [PATCH 106/135] Set target_arch to x64 for sanitizer-windows So we do not pick up the default 32-bit toolchain. --- buildbot/osuosl/master/config/builders.py | 1 + 1 file changed, 1 insertion(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index bddb032f0..d8efec9ae 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -1833,6 +1833,7 @@ vs="autodetect", clean=True, depends_on_projects=["llvm", "clang", "lld", "compiler-rt"], + target_arch='x64', # TODO(boomanaiden154): We should probably be using sccache here. extra_configure_args=[ "-DCMAKE_BUILD_TYPE=Release", From f68bf8c65b7b02130f230a94a340e612936c9795 Mon Sep 17 00:00:00 2001 From: cmtice Date: Wed, 27 Aug 2025 10:28:08 -0700 Subject: [PATCH 107/135] [llvm-zorg][CI] Minor fix to cluster management documentation. (#579) Clarify documentation on updating ARC version. --- premerge/cluster-management.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/premerge/cluster-management.md b/premerge/cluster-management.md index 2764b4577..e9ba4cc02 100644 --- a/premerge/cluster-management.md +++ b/premerge/cluster-management.md @@ -219,9 +219,9 @@ terraform destroy -target module.premerge_cluster_us_central_resources.kubernete ### Bumping the Version Number This is necessary only for bumping the version of ARC. This involves simply -updating the version field for the `premerge_resources` objects in `main.tf`. -Each premerge cluster (`llvm-premerge-cluster-us-central` and -`llvm-premerge-cluster-us-west`) has a separate version. This allows for +updating the `github_arc_version` field for premerge cluster resources in +`premerge/main.tf`. Each premerge cluster (`llvm-premerge-cluster-us-central` +and `llvm-premerge-cluster-us-west`) has a separate version. This allows for updating them separately which allows for zero-downtime upgrades when the system is operating at low capacity. Make sure to commit the changes and push them to `llvm-zorg` to ensure others working on the terraform configuration From ce54ff35059ce7ba914d04f87b8e3f75bd8bdb9e Mon Sep 17 00:00:00 2001 From: Vladimir Vereschaka Date: Wed, 27 Aug 2025 14:24:09 -0700 Subject: [PATCH 108/135] Enable BOLT project on Aarch64/Linux Windows host cross builder. (#577) The builder builds host based BOLT tools only, no Aarch64 bolt runtime yet (not supported for the cross toolchain builds). --- buildbot/osuosl/master/config/builders.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index d8efec9ae..722e2e28b 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -269,7 +269,7 @@ }, {'name' : "llvm-clang-win-x-aarch64", - 'tags' : ["clang", "llvm", "lld", "clang-tools-extra", "compiler-rt", "libc++", "libc++abi", "libunwind", "cross", "aarch64"], + 'tags' : ["clang", "llvm", "lld", "clang-tools-extra", "compiler-rt", "libc++", "libc++abi", "libunwind", "bolt", "cross", "aarch64"], 'workernames' : ["as-builder-2"], 'builddir': "x-aarch64", 'factory' : UnifiedTreeBuilder.getCmakeExBuildFactory( @@ -282,6 +282,7 @@ 'libcxx', 'libcxxabi', 'lld', + 'bolt', ], vs = "autodetect", clean = True, @@ -289,6 +290,7 @@ "check-llvm", "check-clang", "check-lld", + "check-bolt", "check-compiler-rt-aarch64-unknown-linux-gnu", "check-unwind-aarch64-unknown-linux-gnu", "check-cxxabi-aarch64-unknown-linux-gnu", @@ -297,7 +299,8 @@ cmake_definitions = { "LLVM_TARGETS_TO_BUILD" : "AArch64", "LLVM_INCLUDE_BENCHMARKS" : "OFF", - "LLVM_LIT_ARGS" : "-v -vv --threads=32 --time-tests", + # Allow long timeout for BOLT's dump-dot-func.test test. + "LLVM_LIT_ARGS" : "-v -vv --threads=32 --time-tests --timeout=300", "TOOLCHAIN_TARGET_TRIPLE" : "aarch64-unknown-linux-gnu", "TOOLCHAIN_TARGET_SYSROOTFS" : util.Interpolate("%(prop:sysroot_path_agx)s"), "REMOTE_TEST_HOST" : util.Interpolate("%(prop:remote_host_agx)s"), @@ -306,6 +309,7 @@ "CMAKE_CXX_FLAGS" : "-D__OPTIMIZE__", "CMAKE_C_COMPILER_LAUNCHER" : "ccache", "CMAKE_CXX_COMPILER_LAUNCHER" : "ccache", + "BOLT_TARGETS_TO_BUILD" : "AArch64", }, cmake_options = [ "-C", util.Interpolate("%(prop:srcdir_relative)s/clang/cmake/caches/CrossWinToARMLinux.cmake"), From 47fe254f50dc171cd7ab1c2ba23bc90a48e4d1ee Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Thu, 28 Aug 2025 16:55:33 +0200 Subject: [PATCH 109/135] [AMDGPU][Libc] Add cmake-cache based builder (#581) Move the buildbot that used to do the libc builds to the CMake cache file based approach. This is a bit of a WIP as we also sort out a few other things in the background. --- buildbot/osuosl/master/config/builders.py | 37 ++++------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 722e2e28b..92e238d3f 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -2087,37 +2087,12 @@ 'workernames' : ["omp-vega20-1"], # We would like to never collapse, but it seems the load is too high on that system to keep up. 'builddir': "openmp-offload-libc-amdgpu-runtime", - 'factory' : OpenMPBuilder.getOpenMPCMakeBuildFactory( - clean=True, - depends_on_projects=['llvm', 'clang', 'compiler-rt', 'libc', 'lld', 'offload', 'openmp'], - # Special case this bot to account for new (verbose) libc build syntax - enable_runtimes=['openmp', 'compiler-rt', 'offload'], - extraCmakeArgs=[ - "-DCMAKE_BUILD_TYPE=Release", - "-DCLANG_DEFAULT_LINKER=lld", - "-DLLVM_TARGETS_TO_BUILD=X86;AMDGPU", - "-DLLVM_ENABLE_ASSERTIONS=ON", - "-DCMAKE_C_COMPILER_LAUNCHER=ccache", - "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", - "-DLIBOMPTARGET_FOUND_AMDGPU_GPU=ON", - "-DLIBOMP_ARCHER_SUPPORT=OFF", - "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=libc", - "-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa", - "-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_TEST_ARCHITECTURE=gfx906", - ], - env={ - 'HSA_ENABLE_SDMA':'0', - }, - install=True, - testsuite=False, - testsuite_sollvevv=False, - extraTestsuiteCmakeArgs=[ - "-DTEST_SUITE_SOLLVEVV_OFFLOADING_CFLAGS=-fopenmp;-fopenmp-targets=amdgcn-amd-amdhsa;-Xopenmp-target=amdgcn-amd-amdhsa;-march=gfx906", - "-DTEST_SUITE_SOLLVEVV_OFFLOADING_LDLAGS=-fopenmp;-fopenmp-targets=amdgcn-amd-amdhsa;-Xopenmp-target=amdgcn-amd-amdhsa;-march=gfx906", - ], - add_lit_checks=["check-offload", "check-libc-amdgcn-amd-amdhsa"], - add_openmp_lit_args=["--filter-out=offloading/pgo1.c"], - )}, + 'factory' : AnnotatedBuilder.getAnnotatedBuildFactory( + depends_on_projects=['llvm', 'clang', 'compiler-rt', 'lld', 'libc', 'offload', 'openmp', 'libunwind'], + script='amdgpu-offload-cmake.py --cmake-file=AMDGPULibcBot.cmake', + checkout_llvm_sources=True, + script_interpreter=None + )}, {'name' : "openmp-offload-amdgpu-clang-flang", 'tags' : ["openmp,flang"], From d6f7e7575b7522f3378fca14be636567f9f43375 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 28 Aug 2025 08:59:20 -0700 Subject: [PATCH 110/135] [CI] Upload lit test timing files (#580) This patch makes it so that the postcommit builders upload the lit timing files to the GCS buckets. Once the caches get populated we can add support to the premerge builders for pulling this in. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 2310a5058..d945ba94b 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -110,6 +110,7 @@ def start_build_linux(commit_sha: str, bucket_name: str, k8s_client) -> str: "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"', + "python .ci/cache_lit_timing_files.py upload", "echo BUILD FINISHED", ] start_build( @@ -133,6 +134,7 @@ def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-tools check-lld check-llvm check-mlir check-polly"', + "python .ci/cache_lit_timing_files.py upload", "echo BUILD FINISHED", ] bash_command = f"bash -c \"{';'.join(bash_commands)}\"\"" From ac13274bc0bccc9153f36fd059b22ce627659d66 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 28 Aug 2025 17:55:03 +0000 Subject: [PATCH 111/135] [CI] Also set CACHE_GCS_BUCKET in premerge buildbot containers Otherwise the cache_lit_timing_files.py script will not work. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index d945ba94b..e263bc5f5 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -106,6 +106,7 @@ def start_build_linux(commit_sha: str, bucket_name: str, k8s_client) -> str: "export CXX=clang++", "export POSTCOMMIT_CI=1", f"export SCCACHE_GCS_BUCKET={bucket_name}", + f"export CACHE_GCS_BUCKET={bucket_name}", "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", @@ -130,6 +131,7 @@ def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): f"git checkout {commit_sha}", "export POSTCOMMIT_CI=1", f"export SCCACHE_GCS_BUCKET={bucket_name}", + f"export CACHE_GCS_BUCKET={bucket_name}", "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", From 483e0ee6e77d9eded2e2885b9319f8cfca7651dd Mon Sep 17 00:00:00 2001 From: Omair Javaid Date: Fri, 29 Aug 2025 13:55:32 +0500 Subject: [PATCH 112/135] Fix ClangBuilder for running LNT on Windows (#575) This PR adds support for running the LLVM test-suite on Windows buildbots using ClangBuilder. The changes ensure that the correct executable paths and file extensions are used when running Python and LNT scripts on Windows. This depends on llvm/llvm-project#155226 which adds llvm-lit.cmd wrapper to make llvm-lit directory runnable on windows. --- zorg/buildbot/builders/ClangBuilder.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/zorg/buildbot/builders/ClangBuilder.py b/zorg/buildbot/builders/ClangBuilder.py index ff5d91bf7..87f4591bd 100644 --- a/zorg/buildbot/builders/ClangBuilder.py +++ b/zorg/buildbot/builders/ClangBuilder.py @@ -533,17 +533,18 @@ def _getClangCMakeBuildFactory( env=env)) # Get generated python, lnt - python = util.Interpolate('%(prop:builddir)s/test/sandbox/bin/python') - lnt = util.Interpolate('%(prop:builddir)s/test/sandbox/bin/lnt') - lnt_setup = util.Interpolate('%(prop:builddir)s/test/lnt/setup.py') + python = InterpolateToPosixPath('%(prop:builddir)s/test/sandbox/Scripts/python') + lnt_ext = '.exe' if vs else '' + lnt = InterpolateToPosixPath(f'%(prop:builddir)s/test/sandbox/Scripts/lnt{lnt_ext}') + lnt_setup = InterpolateToPosixPath('%(prop:builddir)s/test/lnt/setup.py') # Paths - sandbox = util.Interpolate('%(prop:builddir)s/test/sandbox') - test_suite_dir = util.Interpolate('%(prop:builddir)s/test/test-suite') + sandbox = InterpolateToPosixPath('%(prop:builddir)s/test/sandbox') + test_suite_dir = InterpolateToPosixPath('%(prop:builddir)s/test/test-suite') # Get latest built Clang (stage1 or stage2) - cc = util.Interpolate(f'%(prop:builddir)s/{compiler_path}/bin/{cc}') - cxx = util.Interpolate(f'%(prop:builddir)s/{compiler_path}/bin/{cxx}') + cc = InterpolateToPosixPath(f'%(prop:builddir)s/{compiler_path}/bin/{cc}') + cxx = InterpolateToPosixPath(f'%(prop:builddir)s/{compiler_path}/bin/{cxx}') # LNT Command line (don't pass -jN. Users need to pass both --threads # and --build-threads in nt_flags/test_suite_flags to get the same effect) @@ -558,7 +559,7 @@ def _getClangCMakeBuildFactory( # Append any option provided by the user test_suite_cmd.extend(nt_flags) else: - lit = util.Interpolate(f'%(prop:builddir)s/{stage1_build}/bin/llvm-lit') + lit = InterpolateToPosixPath(f'%(prop:builddir)s/{stage1_build}/bin/llvm-lit') test_suite_cmd = [python, lnt, 'runtest', 'test-suite', '--no-timestamp', '--sandbox', sandbox, @@ -567,7 +568,7 @@ def _getClangCMakeBuildFactory( '--cxx', cxx, '--use-lit', lit, # Carry on building even if there is a failure. - '--build-tool-options', '"-k"'] + '--build-tool-options', '"-k 0"' if '--use-make=ninja' in testsuite_flags else '"-k"'] # Enable fortran if flang is checked out if checkout_flang: fortran_flags = [ From 793d3c576be24c04030946009462225c851f2712 Mon Sep 17 00:00:00 2001 From: Omair Javaid Date: Fri, 29 Aug 2025 14:02:32 +0500 Subject: [PATCH 113/135] Add flang-arm64-windows-msvc-testsuite builder (#576) This PR adds a new buildbot to run LLVM testsuite on windows. It adds a new builder and worker pair. The testsuite will run after the first stage build skipping the ninja check step as we cover that on other buildbots. This will primarily be the builder to test that ensures LNT and LLVM testsuite are running without regressions on windows. We will later build on it to add support for test more as our Windows hardware availability improves. This depends on #575 and llvm/llvm-project#155226. --- buildbot/osuosl/master/config/builders.py | 23 +++++++++++++++++++++++ buildbot/osuosl/master/config/workers.py | 1 + 2 files changed, 24 insertions(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 92e238d3f..a89cb4662 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -2631,6 +2631,29 @@ "-DCOMPILER_RT_BUILD_SANITIZERS=OFF", "-DLLVM_CCACHE_BUILD=ON"])}, + {'name' : "flang-arm64-windows-msvc-testsuite", + 'tags' : ["flang"], + 'workernames' : ["linaro-armv8-windows-msvc-06"], + 'builddir': "flang-arm64-win-msvc-ts", + 'factory' : ClangBuilder.getClangCMakeBuildFactory( + vs="manual", + clean=False, + checkout_flang=True, + checkout_lld=True, + runTestSuite=True, + testStage1=False, + testsuite_flags=[ + '--cmake-define', "TEST_SUITE_SUBDIRS='Fortran'", + '--use-make=ninja', + '--threads=8', + '--build-threads=8'], + extra_cmake_args=[ + "-DLLVM_TARGETS_TO_BUILD=AArch64", + "-DCLANG_DEFAULT_LINKER=lld", + "-DCMAKE_TRY_COMPILE_CONFIGURATION=Release", + "-DCOMPILER_RT_BUILD_SANITIZERS=OFF", + "-DLLVM_CCACHE_BUILD=ON"])}, + {'name' : 'ppc64-flang-aix', 'tags' : ["flang", "ppc", "ppc64", "aix"], 'workernames' : ['ppc64-flang-aix-test'], diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 9775348d6..09f7a7a52 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -51,6 +51,7 @@ def get_all(): create_worker("linaro-armv8-windows-msvc-03", max_builds=1), create_worker("linaro-armv8-windows-msvc-04", max_builds=1), create_worker("linaro-armv8-windows-msvc-05", max_builds=1), + create_worker("linaro-armv8-windows-msvc-06", max_builds=1), # Linux s390x Ubuntu Focal, IBM z13 (5GHz), 64GB of RAM create_worker("onnx-mlir-nowarn-linux-s390x", properties={'jobs' : 4}, max_builds=1), From cd116057d3a83349108b70b1fc3bf691502b8232 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 29 Aug 2025 10:58:40 -0700 Subject: [PATCH 114/135] [CI] Add flang-rt to postcommit testing (#583) This patch ensures we test flang-rt postcommit so we are actually testing the configuration introduced by #156039. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index e263bc5f5..71f4dabf1 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -110,7 +110,7 @@ def start_build_linux(commit_sha: str, bucket_name: str, k8s_client) -> str: "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", - './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"', + './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;flang-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-flang-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"', "python .ci/cache_lit_timing_files.py upload", "echo BUILD FINISHED", ] From 890c29fe970ac6e4edff01387ce8d8b9046b5a25 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 29 Aug 2025 23:26:20 +0000 Subject: [PATCH 115/135] [CI] Add compiler-rt to windows postcommit https://github.com/llvm/llvm-project/pull/155186 enabled this on the premerge side, and we want corresponding postcommit coverage. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 71f4dabf1..4316230ca 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -135,7 +135,7 @@ def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", - '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-tools check-lld check-llvm check-mlir check-polly"', + '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-tools check-lld check-llvm check-mlir check-polly" "compiler-rt" "check-compiler-rt"', "python .ci/cache_lit_timing_files.py upload", "echo BUILD FINISHED", ] From 73300d4f7f5754fb79f56938cc70390aa7e65adc Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Mon, 1 Sep 2025 10:18:36 +0100 Subject: [PATCH 116/135] Insert debugify-report into the test suite build directory (#582) As part of the DebugifyBuilder, we add cflag arguments to the test suite build step containing the path of a report file, where any bugs detected by debugify will be written. Debugify appends to the file rather than replacing it, as we wish to accumulate the bugs found in all invocations of Clang during the test suite build step to a single file. Currently however, this file is not cleaned up after builds, meaning that we are constantly appending bugs to the same file after each test run, rather than creating a fresh file - this contaminates the results for future builds, as bugs will never disappear from the list even if they no longer exist in LLVM. This patch moves the report file into the test suite build directory, which will always be cleaned before each test run, preventing the results of one run from seeping into subsequent runs. --- zorg/buildbot/builders/DebugifyBuilder.py | 5 ++++- zorg/buildbot/builders/TestSuiteBuilder.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/zorg/buildbot/builders/DebugifyBuilder.py b/zorg/buildbot/builders/DebugifyBuilder.py index 439752256..0e838ab6e 100644 --- a/zorg/buildbot/builders/DebugifyBuilder.py +++ b/zorg/buildbot/builders/DebugifyBuilder.py @@ -1,6 +1,7 @@ from buildbot.plugins import util from buildbot.steps.shell import ShellCommand from zorg.buildbot.builders import TestSuiteBuilder +from zorg.buildbot.builders.TestSuiteBuilder import test_suite_build_path from zorg.buildbot.commands.CmakeCommand import CmakeCommand @@ -53,7 +54,9 @@ def getDebugifyBuildFactory( ]) # This path will be passed through to util.Interpolate, so we leave it in this format. - debugify_output_path = f"%(prop:builddir)s/debugify-report.json" + # NB: This must be stored in the test suite build directory, as that is the only way to ensure that it is + # unconditionally up before (and not after) each run. + debugify_output_path = f"%(prop:builddir)s/{test_suite_build_path}/debugify-report.json" # Make a local copy of the test suite configure args, as we are going to modify that. if extra_test_suite_configure_args is not None: diff --git a/zorg/buildbot/builders/TestSuiteBuilder.py b/zorg/buildbot/builders/TestSuiteBuilder.py index 719315b48..86753bd43 100644 --- a/zorg/buildbot/builders/TestSuiteBuilder.py +++ b/zorg/buildbot/builders/TestSuiteBuilder.py @@ -8,6 +8,9 @@ from zorg.buildbot.commands.NinjaCommand import NinjaCommand from zorg.buildbot.commands.LitTestCommand import LitTestCommand +# The DebugifyBuilder needs to know the test-suite build directory, so we share the build directory via this variable. +test_suite_build_path = 'test/build-test-suite' + # This builder is uses UnifiedTreeBuilders and adds running # llvm-test-suite with cmake and ninja step. @@ -30,7 +33,7 @@ def addTestSuiteStep( lit = util.Interpolate('%(prop:builddir)s/' + compiler_dir + '/bin/llvm-lit') test_suite_base_dir = util.Interpolate('%(prop:builddir)s/' + 'test') test_suite_src_dir = util.Interpolate('%(prop:builddir)s/' + 'test/test-suite') - test_suite_workdir = util.Interpolate('%(prop:builddir)s/' + 'test/build-test-suite') + test_suite_workdir = util.Interpolate('%(prop:builddir)s/' + test_suite_build_path) cmake_lit_arg = util.Interpolate('-DTEST_SUITE_LIT:FILEPATH=%(prop:builddir)s/' + compiler_dir + '/bin/llvm-lit') # used for cmake building test-suite step if extra_configure_args is not None: From 35be7cfc054a32c0a31db0594c695a139451fa51 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 1 Sep 2025 18:19:08 +0200 Subject: [PATCH 117/135] Drop unnecessary -treat-scalable-fixed-error-as-warning=false options (#584) This is already the default, and I'm dropping this option in https://github.com/llvm/llvm-project/pull/156336. --- buildbot/osuosl/master/config/builders.py | 42 ++++++++++------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index a89cb4662..6b140f924 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -476,11 +476,7 @@ # https://github.com/aws/aws-graviton-getting-started/blob/main/c-c++.md. # AArch64 Clang+LLVM+RT+LLD check-all + flang + test-suite + - # mlir-integration-tests w/SVE-Vector-Length-Agnostic Note that in this and - # other clang-aarch64-sve-* builders we set -mllvm - # -treat-scalable-fixed-error-as-warning=false to make compiler fail on - # non-critical SVE codegen issues. This helps us notice and fix SVE - # problems sooner rather than later. + # mlir-integration-tests w/SVE-Vector-Length-Agnostic {'name' : "clang-aarch64-sve-vla", 'tags' : ["clang"], 'workernames' : ["linaro-g3-01", "linaro-g3-02", "linaro-g3-03", "linaro-g3-04"], @@ -493,8 +489,8 @@ 'NO_STOP_MESSAGE':'1', # For Fortran test-suite }, testsuite_flags=[ - '--cppflags', '-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -O3', - '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -O3', + '--cppflags', '-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -O3', + '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -O3', '--threads=32', '--build-threads=32'], extra_cmake_args=[ "-DCMAKE_C_FLAGS='-mcpu=neoverse-512tvb'", @@ -518,12 +514,12 @@ 'NO_STOP_MESSAGE':'1', # For Fortran test-suite }, testsuite_flags=[ - '--cppflags', '-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -O3', - '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -O3', + '--cppflags', '-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -O3', + '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -O3', '--threads=32', '--build-threads=32'], extra_cmake_args=[ - "-DCMAKE_C_FLAGS='-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false'", - "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false'", + "-DCMAKE_C_FLAGS='-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred'", + "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-512tvb -mllvm -scalable-vectorization=preferred'", "-DLLVM_ENABLE_LLD=True", "-DMLIR_INCLUDE_INTEGRATION_TESTS=True", "-DMLIR_RUN_ARM_SVE_TESTS=True"])}, @@ -541,8 +537,8 @@ 'NO_STOP_MESSAGE':'1', # For Fortran test-suite }, testsuite_flags=[ - '--cppflags', '-mcpu=neoverse-512tvb -msve-vector-bits=256 -mllvm -treat-scalable-fixed-error-as-warning=false -O3', - '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-512tvb -msve-vector-bits=256 -mllvm -treat-scalable-fixed-error-as-warning=false -O3', + '--cppflags', '-mcpu=neoverse-512tvb -msve-vector-bits=256 -O3', + '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-512tvb -msve-vector-bits=256 -O3', '--threads=32', '--build-threads=32'], extra_cmake_args=[ "-DCMAKE_C_FLAGS='-mcpu=neoverse-512tvb'", @@ -566,12 +562,12 @@ 'NO_STOP_MESSAGE':'1', # For Fortran test-suite }, testsuite_flags=[ - '--cppflags', '-mcpu=neoverse-512tvb -msve-vector-bits=256 -mllvm -treat-scalable-fixed-error-as-warning=false -O3', - '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-512tvb -msve-vector-bits=256 -mllvm -treat-scalable-fixed-error-as-warning=false -O3', + '--cppflags', '-mcpu=neoverse-512tvb -msve-vector-bits=256 -O3', + '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-512tvb -msve-vector-bits=256 -O3', '--threads=32', '--build-threads=32'], extra_cmake_args=[ - "-DCMAKE_C_FLAGS='-mcpu=neoverse-512tvb -msve-vector-bits=256 -mllvm -treat-scalable-fixed-error-as-warning=false'", - "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-512tvb -msve-vector-bits=256 -mllvm -treat-scalable-fixed-error-as-warning=false'", + "-DCMAKE_C_FLAGS='-mcpu=neoverse-512tvb -msve-vector-bits=256'", + "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-512tvb -msve-vector-bits=256'", "-DLLVM_ENABLE_LLD=True", "-DMLIR_INCLUDE_INTEGRATION_TESTS=True", "-DMLIR_RUN_ARM_SVE_TESTS=True"])}, @@ -592,8 +588,8 @@ 'NO_STOP_MESSAGE':'1', # For Fortran test-suite }, testsuite_flags=[ - '--cppflags', '-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -O3', - '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -O3', + '--cppflags', '-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -O3', + '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -O3', '--threads=48', '--build-threads=48'], extra_cmake_args=[ "-DCMAKE_C_FLAGS='-mcpu=neoverse-v2'", @@ -618,12 +614,12 @@ 'NO_STOP_MESSAGE':'1', # For Fortran test-suite }, testsuite_flags=[ - '--cppflags', '-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -O3', - '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false -O3', + '--cppflags', '-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -O3', + '--cmake-define', 'CMAKE_Fortran_FLAGS=-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -O3', '--threads=48', '--build-threads=48'], extra_cmake_args=[ - "-DCMAKE_C_FLAGS='-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false'", - "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred -mllvm -treat-scalable-fixed-error-as-warning=false'", + "-DCMAKE_C_FLAGS='-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred'", + "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-v2 -mllvm -scalable-vectorization=preferred'", "-DLLVM_ENABLE_LLD=True", "-DMLIR_INCLUDE_INTEGRATION_TESTS=True", "-DMLIR_RUN_ARM_SVE_TESTS=True"])}, From bd4188881ae796899c10b34e8473da0185df5c93 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 2 Sep 2025 09:50:43 +0100 Subject: [PATCH 118/135] [ClangBuilder] Correct path of testsuite virtualenv's Python (#586) Fixes 60d2141a5073ce1330bc45e4cc827344af67894b. On Windows, it's Scripts/python, on Linux it's bin/python. Unfortunately there is no third common path to use. Even the virtualenv docs have different paths for Windows and everything else: https://virtualenv.pypa.io/en/latest/user_guide.html#quick-start --- zorg/buildbot/builders/ClangBuilder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/ClangBuilder.py b/zorg/buildbot/builders/ClangBuilder.py index 87f4591bd..4c58abb64 100644 --- a/zorg/buildbot/builders/ClangBuilder.py +++ b/zorg/buildbot/builders/ClangBuilder.py @@ -533,7 +533,8 @@ def _getClangCMakeBuildFactory( env=env)) # Get generated python, lnt - python = InterpolateToPosixPath('%(prop:builddir)s/test/sandbox/Scripts/python') + virtualenv_dir = 'Scripts' if vs else 'bin' + python = InterpolateToPosixPath(f'%(prop:builddir)s/test/sandbox/{virtualenv_dir}/python') lnt_ext = '.exe' if vs else '' lnt = InterpolateToPosixPath(f'%(prop:builddir)s/test/sandbox/Scripts/lnt{lnt_ext}') lnt_setup = InterpolateToPosixPath('%(prop:builddir)s/test/lnt/setup.py') From 3d0b8202f32e6e3f31e06fea60a7ae9291be0166 Mon Sep 17 00:00:00 2001 From: Vladimir Vereschaka Date: Tue, 2 Sep 2025 11:09:14 -0700 Subject: [PATCH 119/135] Revert "Update the project dependencies for the lldb-remote-linux builders." (#588) Reverts llvm/llvm-zorg#574 Causes failures during the configuration step for the lldb-remote-linux-* builders. --- buildbot/osuosl/master/config/builders.py | 36 +++++------------------ 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 6b140f924..696c5b2df 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3657,20 +3657,7 @@ 'workernames': ["as-builder-9"], 'builddir': "lldb-remote-linux-ubuntu", 'factory': UnifiedTreeBuilder.getCmakeExBuildFactory( - depends_on_projects = [ - 'llvm', - 'compiler-rt', - 'clang', - 'libunwind', - 'libcxx', - 'libcxxabi', - 'lld', - 'lldb', - ], - # Allow only these projects with LLVM_ENABLE_PROJECTS. - enable_projects = ["llvm", "clang", "lld", "lldb"], - # Use a proper list of runtimes (LLVM_ENABLE_RUNTIMES) from CrossWinToARMLinux.cmake. - # Avoid making it from a list of the depended projects. + depends_on_projects = ["llvm", "clang", "lld", "lldb"], enable_runtimes = None, checks = [ "check-lldb-unit", @@ -3708,6 +3695,9 @@ "LLDB_ENABLE_CURSES" : "OFF", "LLDB_ENABLE_LZMA" : "OFF", "LLDB_ENABLE_LIBXML2" : "OFF", + # No need to build lldb-server during the first stage. + # We are going to build it for the target platform later. + "LLDB_CAN_USE_LLDB_SERVER" : "OFF", "LLDB_TEST_USER_ARGS" : util.Interpolate( "--env;ARCH_CFLAGS=-mcpu=cortex-a78;" \ "--platform-name;remote-linux;" \ @@ -3796,20 +3786,7 @@ 'workernames': ["as-builder-10"], 'builddir': "lldb-x-aarch64", 'factory': UnifiedTreeBuilder.getCmakeExBuildFactory( - depends_on_projects = [ - 'llvm', - 'compiler-rt', - 'clang', - 'libunwind', - 'libcxx', - 'libcxxabi', - 'lld', - 'lldb', - ], - # Allow only these projects with LLVM_ENABLE_PROJECTS. - enable_projects = ["llvm", "clang", "lld", "lldb"], - # Use a proper list of runtimes (LLVM_ENABLE_RUNTIMES) from CrossWinToARMLinux.cmake. - # Avoid making it from a list of the depended projects. + depends_on_projects = ["llvm", "clang", "lld", "lldb"], enable_runtimes = None, checks = [ "check-lldb-unit", @@ -3846,6 +3823,9 @@ "LLDB_ENABLE_CURSES" : "OFF", "LLDB_ENABLE_LZMA" : "OFF", "LLDB_ENABLE_LIBXML2" : "OFF", + # No need to build lldb-server during the first stage. + # We are going to build it for the target platform later. + "LLDB_CAN_USE_LLDB_SERVER" : "OFF", "LLDB_TEST_USER_ARGS" : util.Interpolate( "--env;ARCH_CFLAGS=-mcpu=cortex-a78;" \ "--platform-name;remote-linux;" \ From edc1dd08f7912d3cbeaf0bda4a9c1c4878c84f7b Mon Sep 17 00:00:00 2001 From: Vladimir Vereschaka Date: Tue, 2 Sep 2025 11:18:35 -0700 Subject: [PATCH 120/135] Revert "Enable BOLT project on Aarch64/Linux Windows host cross builder." (#589) Reverts llvm/llvm-zorg#577 Noticed unstable builder behavior during the BOLT tests. Requires investigation and update https://lab.llvm.org/buildbot/#/builders/193/builds/10263 --- buildbot/osuosl/master/config/builders.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 696c5b2df..8a173c1be 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -269,7 +269,7 @@ }, {'name' : "llvm-clang-win-x-aarch64", - 'tags' : ["clang", "llvm", "lld", "clang-tools-extra", "compiler-rt", "libc++", "libc++abi", "libunwind", "bolt", "cross", "aarch64"], + 'tags' : ["clang", "llvm", "lld", "clang-tools-extra", "compiler-rt", "libc++", "libc++abi", "libunwind", "cross", "aarch64"], 'workernames' : ["as-builder-2"], 'builddir': "x-aarch64", 'factory' : UnifiedTreeBuilder.getCmakeExBuildFactory( @@ -282,7 +282,6 @@ 'libcxx', 'libcxxabi', 'lld', - 'bolt', ], vs = "autodetect", clean = True, @@ -290,7 +289,6 @@ "check-llvm", "check-clang", "check-lld", - "check-bolt", "check-compiler-rt-aarch64-unknown-linux-gnu", "check-unwind-aarch64-unknown-linux-gnu", "check-cxxabi-aarch64-unknown-linux-gnu", @@ -299,8 +297,7 @@ cmake_definitions = { "LLVM_TARGETS_TO_BUILD" : "AArch64", "LLVM_INCLUDE_BENCHMARKS" : "OFF", - # Allow long timeout for BOLT's dump-dot-func.test test. - "LLVM_LIT_ARGS" : "-v -vv --threads=32 --time-tests --timeout=300", + "LLVM_LIT_ARGS" : "-v -vv --threads=32 --time-tests", "TOOLCHAIN_TARGET_TRIPLE" : "aarch64-unknown-linux-gnu", "TOOLCHAIN_TARGET_SYSROOTFS" : util.Interpolate("%(prop:sysroot_path_agx)s"), "REMOTE_TEST_HOST" : util.Interpolate("%(prop:remote_host_agx)s"), @@ -309,7 +306,6 @@ "CMAKE_CXX_FLAGS" : "-D__OPTIMIZE__", "CMAKE_C_COMPILER_LAUNCHER" : "ccache", "CMAKE_CXX_COMPILER_LAUNCHER" : "ccache", - "BOLT_TARGETS_TO_BUILD" : "AArch64", }, cmake_options = [ "-C", util.Interpolate("%(prop:srcdir_relative)s/clang/cmake/caches/CrossWinToARMLinux.cmake"), From 32781ae7bc0cef1228fea3c197564634631ef8d6 Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Wed, 3 Sep 2025 14:02:47 +0200 Subject: [PATCH 121/135] [AMDGPU][Libc] Fix argument passing to script (#590) Having the argument as part of the script name is an error as it tries to find / exec that exact string. Pass theargument as extra_args instead. Not sure if we need to split argument name and value in separate array entries. --- buildbot/osuosl/master/config/builders.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 8a173c1be..9eb7630f3 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -2081,7 +2081,8 @@ 'builddir': "openmp-offload-libc-amdgpu-runtime", 'factory' : AnnotatedBuilder.getAnnotatedBuildFactory( depends_on_projects=['llvm', 'clang', 'compiler-rt', 'lld', 'libc', 'offload', 'openmp', 'libunwind'], - script='amdgpu-offload-cmake.py --cmake-file=AMDGPULibcBot.cmake', + script='amdgpu-offload-cmake.py', + extra_args=['--cmake-file=AMDGPULibcBot.cmake'], checkout_llvm_sources=True, script_interpreter=None )}, From cf1c868ee4bd8d5e633c8440a673648af3cbdbcd Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 3 Sep 2025 11:50:25 -0700 Subject: [PATCH 122/135] [CI] Add design document for post submit testing (#512) This patch adds the design document outlining how we plan on implementing post submit testing for the premerge configuration along with the motivation and alternatives considered. --- premerge/post-submit-testing.md | 203 ++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 premerge/post-submit-testing.md diff --git a/premerge/post-submit-testing.md b/premerge/post-submit-testing.md new file mode 100644 index 000000000..04a4be1f9 --- /dev/null +++ b/premerge/post-submit-testing.md @@ -0,0 +1,203 @@ +# Post Submit Testing + +## Introduction + +While this infrastructure is focused on premerge testing, it is also important +to make sure that the specific configuration we are testing is tested post +commit as well. This document outlines the motivation for the need to test this +configuration post commit, how we plan on implementing this to ensure we get +fast feedback scalably, and why we are utilizing this design over others. + +## Background/Motivation + +LLVM has two types of testing upstream: premerge and postcommit. The premerge +testing is performed using Github Actions every time a pull request (PR) is +updated before it is merged. Premerge testing is performed using this +infrastructure (specifically the `./premerge` folder in llvm-zorg). Landing a PR +consists of squashing the changes into a single commit and adding that commit to +the `main` branch in the LLVM monorepo. We care specifically about the state of +the `main` branch because it is what the community considers to be the canonical +tree. Currently, commits can also be added to the `main` branch by directly +pushing to the main branch. Commits pushed directly to `main` are not tested +through the premerge pipeline as they skip the PR merge process. After a new +commit lands in the `main` branch, postcommit testing is performed. Most +postcommit testing is performed through the Buildbot infrastructure. The main +Buildbot instance for LLVM has a web instance hosted at +[lab.llvm.org](https://lab.llvm.org/buildbot/#/builders). When a new commit +lands in `main` the Buildbot instance (sometimes referred to as the Buildbot +master) will trigger many different builds, base on the configurations +defined in the llvm-zorg repository under the `buildbot/` folder. These +configurations are run on Buildbot workers that are hosted by the community. +Some builders build too slowly to keep up with the pace of commits to `main`, +so test batches of commits. This often results in a large number of +erroneous notifications due to the list of possible culprits for a breakage +being more than a single commit. + +For premerge testing, we do not want to notify LLVM developers about failures +already happening in `main` irrelevant to their changes. This requires knowing +the state of `main` at the time the premerge testing for a PR was started. We +also want information on the current state of `main` to empower the community +with information that they need to revert or forward-fix problematic commits. +Problematic commits can occur without being caught by the premerge system due to +someone directly pushing a commit to `main`, or if multiple PRs become +problematic only when combined. This means we need to test the premerge +configuration postcommit as well so that we can determine the state of `main` +(in terms of whether the build passed/failed and what tests failed, if any) at +any given point in time. We can use this data to implement a "premerge advisor" +that would prevent sending notifications about build/test failures not caused by +the changes in a user's PR. + +## Design + +The LLVM Premerge system has two clusters, namely the central cluster in the +Google Cloud Platform (GCP) zone `us-central1-a` and the west cluster in the GCP +zone `us-west1`. We run two clusters in different zones for redundancy so that +if one fails, we can still run jobs on the other cluster. For postcommit +testing, we plan on setting up builders attached to the Buildbot master +described above. We will run one builder on the central cluster and one in the +west cluster. This ensures the configuration is highly available (able to +tolerate an entire cluster going down), similar to the premerge testing. The +builders will be configured to use a script that will launch testing on each +commit to `main` as if it was being run through the premerge testing pipeline, with some small but significant differences. The post submit +testing is intended to be close to the premerge configuration. but will be +different in some key ways. The differences and motivation for them is described +more thoroughly in the [testing configuration](#testing-configuration) section. +These builds will be run inside containers that are distributed onto the cluster +inside kubernetes pods (the fundamental schedulable unit inside kubernetes). +This allows for kubernetes to handle details like what machine a build should +run on. Allowing kubernetes to handle these details also enables Google +Kubernetes Engine (GKE) to autoscale the node pools so we are not paying for +unneeded capacity. Launching builds inside pods also allows for each builder to +handle multiple builds at the same time. + +In terms of the full flow, any commit (which can be from direct pushes or +merging pull requests) pushed to the LLVM monorepo will get detected by the +buildbot master. The Buildbot master will invoke Buildbot workers running on our +clusters. These Buildbot workers will use custom builders to launch a build +wrapped in a kubernetes pod and report the results back to the buildbot master. +When the job is finished, the pod will complete and capacity will be available +for another build, or if there is nothing left to test GKE will see that there +is nothing running on one of the nodes and downscale the node pool. + +### Annotated Builder + +llvm-zorg has multiple types of builders. We plan on using an AnnotatedBuilder. +AnnotatedBuilders allow for the build to be driven using a custom python script +rather than directly dictating the shell commands that should be run to perform +the build. We need the flexibility of the AnnotatedBuilder to deploy jobs on the +cluster. AnnotatedBuilder based builders also enable deploying changes without +needing to restart the buildbot master. Without this, we have to wait for an +administrator of the LLVM buildbot master to restart it before our changes get +deployed. This could significantly delay updates or responses to incidents, +especially before the system is fully stable. + +### Build Distribution + +We want to be able to take advantage of the autoscaling functionality of the new +cluster to efficiently utilize resources. To do this, we plan on having the +AnnotatedBuilder script launch builds as kubernetes pods. This allows for +kubernetes to assign the builds to nodes and also allows autoscaling through the +same mechanism that Github Actions Runner Controller (ARC) uses to autoscale. +This enables us to quickly process builds at peak times and not pay for extra +capacity when commit traffic is quiet, ensuring our resource use is efficient +while still providing fast feedback. + +Using the kubernetes API inside of a python script (our AnnotatedBuilder +implementation) to launch builds does add some complexity. However, we belive +the additional complexity is justified as it allows us to achieve our goals +while maintaining efficient resource usage. + +### Testing Configuration + +By testing configuration, we mean both the environment that the tests run in, +and the set of tests that run. The testing configuration will be as close to the +premerge configuration as possible. We will be running all tests inside the same +container with the same scripts (the `monolithic-linux.sh` and +`monolithic-windows.sh` scripts) used by the premerge testing. However, there +will be one main difference between the premerge and postcommit testing +configurations. In the postcommit configuration we propose testing all projects +on every commit rather than only testing the projects that themselves changed or +had dependencies that changed. We propose this for two main reasons. Firstly, +Buildbot does not have good support for heterogenous build configurations. This +means that testing a different set of projects within a single builder depending +upon the contents of the commit could easily cause problems. More notifications +could be produced if certain projects (that were only triggered by some files) +were failing and some were passing which would significantly increase false +positive notifications. For example, supposed that we have three commits that +land in `main` and run through postcommit testing: commit A that touches MLIR, +commit B that touches clang-tidy, and commit C that modifies MLIR. Commit A +lands, then commit B, then commit C. If commit A introduces MLIR test failures +into an otherwise clean slate, we would see the following events: + +1. Commit A lands. Because it touches MLIR, the buildbot worker runs the MLIR + tests. Some of the tests fail. The buildbot "turns red" and a notification is + sent out to the PR author. +2. Commit B lands. Since it touches clang-tidy, the buildbot worker runs the + clang-tidy tests. All of the tests pass. The buildbot "turns green". No + notifications are sent out since everything is passing. +3. Commit C lands. Since it touches MLIR, the buildbot workers runs the MLIR + tests. The problem introduced in commit A still exists, so some tests fail. + No new tests fail. Since the buildbot was previously green due to the + interspersed clang-tidy commit, a notification is still sent out to the + author of commit C. + +By running the tests for all projects in every postsubmit test run, we avoid +the problematic situation described above. + +Another reason for running all the tests in every postsubmit run: When running +premerge tests on a PR, we also explicitly do not test certain projects even +though their dependencies change. While we do this because we suspect +interactions resulting in test failures would be quite rare, it is possible, and +having a postcommit configuration catch these rare failures would be useful. + +### Data Storage + +The hosted Buildbot master instance at [lab.llvm.org](https://lab.llvm.org) +contains results for all recent postcommit runs. We plan on querying the results +from the buildbot master because they are already available and that is where +they will natively be reported after the infrastructure is set up. Buildbot +supports a [REST API](https://docs.buildbot.net/latest/developer/rest.html) that +would allow for easily querying the state of a commit in `main`. + +In the future, we may implement a "premerge advisor" that tells the user what +tests/build failures they can safely ignore, we need to know what is currently +failing on `main`. Each pull request is tested as if it was merged into main, +which means the commit underneath the PR is very recent. If a premerge run +fails, the premerge advisor will find the commit from `main` the PR is being +tested on. It will then query the Buildbot master using the REST API for the +status of that commit, or the preceeding commits if testing for the requested +commit has not yet completed. It can then report the appropriate status to the +user. Having the status will let the premerge advisor avoid pestering LLVM +developers with failures unrelated to their changes. + +## Alternatives Considered + +Originally, we were looking at running postcommit testing through Github +Actions, like the premerge tests. This is primarily due to it being easy to +implement (a single line change in the Github Actions workflow config) and also +easy to integrate with the Github API for implementation of the premerge testing +advisor. More detailed motivation for the doing postcommit testing directly +through Github is available in the +[discourse RFC thread](https://discourse.llvm.org/t/rfc-running-premerge-postcommit-through-github-actions/86124) +where we proposed doing this. We eventually decided against implementation in +this way for a couple of reasons: + +1. Nonstandard - The standard postcommit testing infrastructure for LLVM is + through Buildbot. Doing postcommit testing for the premerge configuration + through Github would represent a significant departure from this. This means + we are leaving behind some common infrastructure and are also forcing a new + unfamiliar postcommit interface on LLVM contributors. +2. Notifications - This is the biggest issue. Github currently gives very little + control over the notifications that are sent out when the build fails or gets + cancelled. This is specifically a problem with Github sending out + notifications for build failures even if the previous build has failed. This + can easily create a lot of warning fatigue which is something we are putting + a lot of effort in to avoid. We want the premerge system to be perceived as + reliable, have people trust its results, and most importantly, have people + pay attention to failures when they do occur. They are significantly more + likely to pay attention when they are the author of the patch getting the + notification and the feedback is actionable. +3. Customization - Buildbot can be customized around issues like notifications + whereas Github cannot. Github is not particularly responsive on feature + requests and their notification story has been poor for a while, so their + lack of customization is a strategic risk. From 687a8baeb6ee539beb704599dba0b8539067d853 Mon Sep 17 00:00:00 2001 From: Omair Javaid Date: Thu, 4 Sep 2025 15:16:27 +0500 Subject: [PATCH 123/135] [ClangBuilder] Correct path of testsuite virtualenv's lnt (#592) This is a follow up to #586. Fixes 60d2141a5073ce1330bc45e4cc827344af67894b. (#576) We forgot to fix the lnt path to work on both windows and linux. On Windows, it's Scripts/lnt, on Linux it's bin/lnt. --- zorg/buildbot/builders/ClangBuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zorg/buildbot/builders/ClangBuilder.py b/zorg/buildbot/builders/ClangBuilder.py index 4c58abb64..9b3157d76 100644 --- a/zorg/buildbot/builders/ClangBuilder.py +++ b/zorg/buildbot/builders/ClangBuilder.py @@ -536,7 +536,7 @@ def _getClangCMakeBuildFactory( virtualenv_dir = 'Scripts' if vs else 'bin' python = InterpolateToPosixPath(f'%(prop:builddir)s/test/sandbox/{virtualenv_dir}/python') lnt_ext = '.exe' if vs else '' - lnt = InterpolateToPosixPath(f'%(prop:builddir)s/test/sandbox/Scripts/lnt{lnt_ext}') + lnt = InterpolateToPosixPath(f'%(prop:builddir)s/test/sandbox/{virtualenv_dir}/lnt{lnt_ext}') lnt_setup = InterpolateToPosixPath('%(prop:builddir)s/test/lnt/setup.py') # Paths From 6ebea64fb4750f8a5c1668a20fbdbb58403a0242 Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Thu, 4 Sep 2025 11:27:02 -0400 Subject: [PATCH 124/135] [CI] Track line diff per LLVM commit (#591) This change adds new data points to track how many lines were modified per commit, captured under a new `diff` field. This field replaces `files_modified` in our BigQuery table, as it serves the same purpose but now contains additional data regarding each file. --- .../llvm_commits_table_schema.json | 28 +++++++++++++++++-- .../ops-container/process_llvm_commits.py | 20 +++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/premerge/bigquery_schema/llvm_commits_table_schema.json b/premerge/bigquery_schema/llvm_commits_table_schema.json index 8d86c77e6..7d9333d3b 100644 --- a/premerge/bigquery_schema/llvm_commits_table_schema.json +++ b/premerge/bigquery_schema/llvm_commits_table_schema.json @@ -48,9 +48,31 @@ "description": "List of GitHub users who reviewed the pull request for this commit" }, { - "name": "files_modified", - "type": "STRING", + "name": "diff", + "type": "RECORD", "mode": "REPEATED", - "description": "List of filepaths modified by this commit" + "description": "List of files and line addition/deletion counts for this commit", + "fields": [ + { + "name": "file", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "additions", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "deletions", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "total", + "type": "INTEGER", + "mode": "NULLABLE" + } + ] } ] diff --git a/premerge/ops-container/process_llvm_commits.py b/premerge/ops-container/process_llvm_commits.py index 29bc1067e..317df61a8 100644 --- a/premerge/ops-container/process_llvm_commits.py +++ b/premerge/ops-container/process_llvm_commits.py @@ -19,8 +19,7 @@ # Number of days to look back for new commits # We allow some buffer time between when a commit is made and when it is queried -# for reviews. This is allow time for any events to propogate in the GitHub -# Archive BigQuery tables. +# for reviews. This is to allow time for any new GitHub events to propogate. LOOKBACK_DAYS = 2 # Template GraphQL subquery to check if a commit has an associated pull request @@ -57,7 +56,7 @@ class LLVMCommitInfo: commit_sha: str commit_timestamp_seconds: int - files_modified: set[str] + diff: list[dict[str, int | str]] commit_author: str = "" # GitHub username of author is unknown until API call has_pull_request: bool = False pull_request_number: int = 0 @@ -117,7 +116,15 @@ def query_for_reviews( commit.hexsha: LLVMCommitInfo( commit_sha=commit.hexsha, commit_timestamp_seconds=commit.committed_date, - files_modified=set(commit.stats.files.keys()), + diff=[ + { + "file": file, + "additions": line_stats["insertions"], + "deletions": line_stats["deletions"], + "total": line_stats["lines"], + } + for file, line_stats in commit.stats.files.items() + ], ) for commit in new_commits } @@ -210,7 +217,10 @@ def upload_daily_metrics_to_bigquery( ) table = bq_client.get_table(table_ref) commit_records = [dataclasses.asdict(commit) for commit in new_commits] - bq_client.insert_rows(table, commit_records) + errors = bq_client.insert_rows(table, commit_records) + if errors: + logging.error("Failed to upload commit info to BigQuery: %s", errors) + exit(1) def main() -> None: From 56a3e84911a369a43ee31248683148ef364ead1c Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Thu, 4 Sep 2025 20:21:41 +0000 Subject: [PATCH 125/135] [CI] Fix rounding when determining number of API batches to query When querying the GitHub API, we get a runtime error if the batch size is a multiple of the total number of commits. One extra batch is queried for 0 commits, which then raises an exception when trying to access the results. This change corrects the number of batches to query for in those cases. --- premerge/ops-container/process_llvm_commits.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/premerge/ops-container/process_llvm_commits.py b/premerge/ops-container/process_llvm_commits.py index 317df61a8..bc3d68e01 100644 --- a/premerge/ops-container/process_llvm_commits.py +++ b/premerge/ops-container/process_llvm_commits.py @@ -1,6 +1,7 @@ import dataclasses import datetime import logging +import math import os import git from google.cloud import bigquery @@ -144,7 +145,7 @@ def query_for_reviews( } } """ - num_batches = len(commit_subqueries) // GITHUB_API_BATCH_SIZE + 1 + num_batches = math.ceil(len(commit_subqueries) / GITHUB_API_BATCH_SIZE) logging.info("Querying GitHub GraphQL API in %d batches", num_batches) for i in range(num_batches): subquery_batch = commit_subqueries[ From 2efd4df336c9ba552810b3854a4d1810e1a5a451 Mon Sep 17 00:00:00 2001 From: dyung Date: Sat, 6 Sep 2025 00:49:08 -0400 Subject: [PATCH 126/135] Update configurations for Mac builders as well as comment descriptions for a few bots. (#585) Update descriptions for several bots that were no longer accurate. Also remove jobs properties on two of the MacOS builders to not be hardcoded. --- buildbot/osuosl/master/config/workers.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py index 09f7a7a52..644fb82d8 100644 --- a/buildbot/osuosl/master/config/workers.py +++ b/buildbot/osuosl/master/config/workers.py @@ -327,19 +327,19 @@ def get_all(): # Ubuntu 22.04 on AWS, x86_64 PS4 target create_worker("sie-linux-worker", properties={'jobs': 40}, max_builds=1), - # 2012 Mac Mini host, 16GB memory: - # - Ubuntu 18.04 in docker container + # Ubuntu 20.04 in docker container on AWS create_worker("doug-worker-1a", properties={'jobs': 8}, max_builds=1), - # - Ubuntu 22.04 in docker container + # Ubuntu 22.04 in docker container on 2012 Mac Mini create_worker("doug-worker-1b", properties={'jobs': 8}, max_builds=1), # Ubuntu 18.04 in docker container on Ryzen 4800U create_worker("doug-worker-2a", properties={'jobs': 16}, max_builds=1), - # Ubuntu 20.04 on AWS, AMD EPYC 7R13 shared + # Ubuntu 22.04 on AWS create_worker("sie-linux-worker2", max_builds=1), + # Ubuntu 20.04 on AWS create_worker("sie-linux-worker3", max_builds=1), # Ubuntu 22.04 on AWS, x86_64 PS5 target create_worker("sie-linux-worker4", properties={'jobs': 40}, max_builds=1), - # Ubuntu 22.04 on AWS + # Ubuntu 24.04 on AWS create_worker("sie-linux-worker5", max_builds=1), # Windows Server 2019 on AWS, x86_64 PS4 target @@ -347,9 +347,10 @@ def get_all(): # Mac target, Intel Core i7-8700B, 64GB create_worker("doug-worker-3", properties={'jobs': 12}, max_builds=1), + # Mac target, Apple M2 Pro, 32GB + create_worker("doug-worker-4", max_builds=1), # Mac target, Apple M1, 16GB - create_worker("doug-worker-4", properties={'jobs': 8}, max_builds=1), - create_worker("doug-worker-5", properties={'jobs': 8}, max_builds=1), + create_worker("doug-worker-5", max_builds=1), # Ubuntu 20.04, AMD Ryzen 5 PRO 3400GE, 32GB create_worker("doug-worker-6", properties={'jobs': 8}, max_builds=1), From e76e1266f79c4a691b6ff7d87f77026a907e75b9 Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Mon, 8 Sep 2025 15:56:17 +0200 Subject: [PATCH 127/135] [AMDGPU][Offload] Update builder after #136729 (#595) Co-authored-by: Joseph Huber --- buildbot/osuosl/master/config/builders.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 9eb7630f3..33b230d1a 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -2107,6 +2107,8 @@ "-DCMAKE_CXX_STANDARD=17", "-DBUILD_SHARED_LIBS=ON", "-DLIBOMPTARGET_PLUGINS_TO_BUILD=amdgpu;host", + "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES='compiler-rt;openmp'", + "-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa", "-DCOMPILER_RT_BUILD_ORC=OFF", "-DCOMPILER_RT_BUILD_XRAY=OFF", "-DCOMPILER_RT_BUILD_MEMPROF=OFF", From c256d55d787576b7e05e20d137f8bd8b161d3a06 Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Tue, 9 Sep 2025 15:01:07 +0200 Subject: [PATCH 128/135] [AMDGPU][Offload] Update the way OpenMP device RTL is built (#596) --- buildbot/osuosl/master/config/builders.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 33b230d1a..626d98767 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -1962,6 +1962,8 @@ "-DLLVM_ENABLE_ASSERTIONS=ON", "-DCMAKE_C_COMPILER_LAUNCHER=ccache", "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", + "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES='compiler-rt;openmp'", + "-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa", ], env={ 'HSA_ENABLE_SDMA':'0', From 188448ec6712f3ba50e1a0cbe926a6892c963bbb Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Tue, 9 Sep 2025 15:07:14 +0200 Subject: [PATCH 129/135] [AMDGPU] Add libcxx as listened-to project (#597) we are going to enable building libcxx for the device on that builder and hence need to listen to changes in that project. --- buildbot/osuosl/master/config/builders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 626d98767..d1c2850cf 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -2082,7 +2082,7 @@ # We would like to never collapse, but it seems the load is too high on that system to keep up. 'builddir': "openmp-offload-libc-amdgpu-runtime", 'factory' : AnnotatedBuilder.getAnnotatedBuildFactory( - depends_on_projects=['llvm', 'clang', 'compiler-rt', 'lld', 'libc', 'offload', 'openmp', 'libunwind'], + depends_on_projects=['llvm', 'clang', 'compiler-rt', 'lld', 'libc', 'libcxx', 'libcxxabi', 'offload', 'openmp', 'libunwind'], script='amdgpu-offload-cmake.py', extra_args=['--cmake-file=AMDGPULibcBot.cmake'], checkout_llvm_sources=True, From 66f19b8e40c05073976edfd3626c0839ac6eafff Mon Sep 17 00:00:00 2001 From: Galina Kistanova Date: Wed, 10 Sep 2025 18:07:49 +0000 Subject: [PATCH 130/135] Collapse doxygen build requests more aggresively. --- buildbot/osuosl/master/config/builders.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index d1c2850cf..4f9d99028 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -47,6 +47,11 @@ reload(StagedBuilder) +# Doxygen build takes a really long time. We want to collapse build requests +# more aggressively to better keep up with the changes. +def collapseRequestsDoxygen(master, builder, req1, req2): + return req1.get('reason', None) == req2.get('reason', None) + all = [ # Clang fast builders. @@ -2723,8 +2728,9 @@ {'name' : "publish-doxygen-docs", 'tags' : ["doc"], - 'workernames' : ["as-worker-4"], #FIXME: Temporarily disabled failing doxygen build - as-builder-8. + 'workernames' : ["as-worker-4"], 'builddir': "publish-doxygen-docs", + 'collapseRequests': collapseRequestsDoxygen, 'factory' : DoxygenDocsBuilder.getLLVMDocsBuildFactory( # Doxygen builds the final result for really # long time without any output. From a724c02e5d5ed8d9a4c62d327fbe72f3ec03164b Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 12 Sep 2025 09:45:27 -0700 Subject: [PATCH 131/135] [ProfCheck] Disable Lit Internal Shell (#598) This patch disables the use of lit's internal shell for the profcheck builder. This was enabled recently in the monorepo but is causing some test failures on the profcheck buildbot that needs more investigation. Turning off the internal shell for now to give me time to investigate things. --- zorg/buildbot/builders/annotated/profcheck.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/zorg/buildbot/builders/annotated/profcheck.sh b/zorg/buildbot/builders/annotated/profcheck.sh index a040436f7..bf035f4bd 100755 --- a/zorg/buildbot/builders/annotated/profcheck.sh +++ b/zorg/buildbot/builders/annotated/profcheck.sh @@ -16,4 +16,5 @@ cmake -GNinja \ echo @@@BUILD_STEP Ninja@@@ export LIT_XFAIL="$(cat ../llvm-project/llvm/utils/profcheck-xfail.txt | tr '\n' ';')" +export LIT_USE_INTERNAL_SHELL=0 ninja check-llvm From eccf7e4cd88f537d56747e9bdde14b80fcea5073 Mon Sep 17 00:00:00 2001 From: Vladimir Vereschaka Date: Fri, 12 Sep 2025 22:52:16 -0700 Subject: [PATCH 132/135] Add new 'enable_projects' argument for getCmakeExBuildFactory factory. (#594) Added missed 'enable_projects' argument for UnifiedTreeBuilder.getCmakeExBuildFactory to pass it to the base LLVMBuildFactory class. --- zorg/buildbot/builders/UnifiedTreeBuilder.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/zorg/buildbot/builders/UnifiedTreeBuilder.py b/zorg/buildbot/builders/UnifiedTreeBuilder.py index 5bc79819b..ce921b7e8 100644 --- a/zorg/buildbot/builders/UnifiedTreeBuilder.py +++ b/zorg/buildbot/builders/UnifiedTreeBuilder.py @@ -612,6 +612,7 @@ def getCmakeWithNinjaMultistageBuildFactory( def getCmakeExBuildFactory( depends_on_projects = None, + enable_projects = "auto", enable_runtimes = "auto", cmake_definitions = None, cmake_options = None, @@ -666,6 +667,17 @@ def getCmakeExBuildFactory( If this parameter is not None and contains the non-runtime project names, they will go to LLVM_ENABLE_PROJECTS CMake configuration parameter. + enable_projects : list, optional + A list of the LLVM projects (except the runtime projects) for the build (default is 'auto'). + This list goes into the factory's 'enable_projects' attribute and LLVM_ENABLE_PROJECTS CMake configuration + parameter. + + If "auto" is specified, the runtime projects will be extracted from 'depends_on_projects' parameter. + + If None is specified, LLVM_ENABLE_PROJECTS will not be set for the CMake configuration step. + + (see LLVMBuildFactory for more details). + enable_runtimes : list, optional A list of the runtime project names for the build (default is 'auto'). This list goes into the factory's 'enable_runtimes' attribute and LLVM_ENABLE_RUNTIMES CMake configuration parameter. @@ -894,6 +906,7 @@ def norm_target_list_arg(lst): # Default root factory. We will collect all steps for all stages here. f = LLVMBuildFactory( depends_on_projects = depends_on_projects, + enable_projects = enable_projects, enable_runtimes = enable_runtimes, hint = hint, llvm_srcdir = llvm_srcdir, From e050f0922ef7f850d9ec40e0c1f6438e5fdd06ea Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 17 Sep 2025 07:38:16 +0100 Subject: [PATCH 133/135] [CI] Add check-lit to postcommit testing (#587) `check-llvm` does not run tests for the `llvm-lit` utility, so run the `check-lit` target as well. --- zorg/buildbot/builders/annotated/premerge/dispatch_job.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py index 4316230ca..994788f47 100644 --- a/zorg/buildbot/builders/annotated/premerge/dispatch_job.py +++ b/zorg/buildbot/builders/annotated/premerge/dispatch_job.py @@ -110,7 +110,7 @@ def start_build_linux(commit_sha: str, bucket_name: str, k8s_client) -> str: "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", - './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;flang-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-flang-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"', + './.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly check-lit" "compiler-rt;flang-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-flang-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"', "python .ci/cache_lit_timing_files.py upload", "echo BUILD FINISHED", ] @@ -135,7 +135,7 @@ def start_build_windows(commit_sha: str, bucket_name: str, k8s_client): "export SCCACHE_GCS_RW_MODE=READ_WRITE", "export SCCACHE_IDLE_TIMEOUT=0", "sccache --start-server", - '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-tools check-lld check-llvm check-mlir check-polly" "compiler-rt" "check-compiler-rt"', + '.ci/monolithic-windows.sh "clang;clang-tools-extra;libclc;lld;llvm;mlir;polly" "check-clang check-clang-tools check-lld check-llvm check-mlir check-polly check-lit" "compiler-rt" "check-compiler-rt"', "python .ci/cache_lit_timing_files.py upload", "echo BUILD FINISHED", ] From 10d9c753241899bb8c25402b7c1296622b1b5217 Mon Sep 17 00:00:00 2001 From: Justin Rivera Date: Wed, 24 Sep 2025 15:24:48 -0400 Subject: [PATCH 134/135] [CI] Track whether a commit is a revert (#601) This changes adds additional fields for tracking whether a commit to llvm-project is a revert and what commit/pull request it is reverting. Such commits are detecting by pattern matching against the commit message. --- .../llvm_commits_table_schema.json | 18 +++++ .../ops-container/process_llvm_commits.py | 65 +++++++++++++------ 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/premerge/bigquery_schema/llvm_commits_table_schema.json b/premerge/bigquery_schema/llvm_commits_table_schema.json index 7d9333d3b..7ddd27e14 100644 --- a/premerge/bigquery_schema/llvm_commits_table_schema.json +++ b/premerge/bigquery_schema/llvm_commits_table_schema.json @@ -47,6 +47,24 @@ "mode": "REPEATED", "description": "List of GitHub users who reviewed the pull request for this commit" }, + { + "name": "is_revert", + "type": "BOOLEAN", + "mode": "NULLABLE", + "description": "Whether or not this commit is a revert" + }, + { + "name": "pull_request_reverted", + "type": "INTEGER", + "mode": "NULLABLE", + "description": "Pull request matched in revert message. Not reliable for determining if a PR was reverted, `commit_reverted` may contain a commit belonging to a PR" + }, + { + "name": "commit_reverted", + "type": "STRING", + "mode": "NULLABLE", + "description": "Commit sha matched in revert message. Not reliable for determining if a commit was reverted, `pull_request_reverted` may contain a PR contributing a commit" + }, { "name": "diff", "type": "RECORD", diff --git a/premerge/ops-container/process_llvm_commits.py b/premerge/ops-container/process_llvm_commits.py index bc3d68e01..028d3b0b6 100644 --- a/premerge/ops-container/process_llvm_commits.py +++ b/premerge/ops-container/process_llvm_commits.py @@ -3,6 +3,7 @@ import logging import math import os +import re import git from google.cloud import bigquery import requests @@ -64,6 +65,9 @@ class LLVMCommitInfo: is_reviewed: bool = False is_approved: bool = False reviewers: set[str] = dataclasses.field(default_factory=set) + is_revert: bool = False + pull_request_reverted: int | None = None + commit_reverted: str | None = None def scrape_new_commits_by_date( @@ -113,26 +117,49 @@ def query_for_reviews( List of LLVMCommitInfo objects for each commit's review information. """ # Create a map of commit sha to info - new_commits = { - commit.hexsha: LLVMCommitInfo( - commit_sha=commit.hexsha, - commit_timestamp_seconds=commit.committed_date, - diff=[ - { - "file": file, - "additions": line_stats["insertions"], - "deletions": line_stats["deletions"], - "total": line_stats["lines"], - } - for file, line_stats in commit.stats.files.items() - ], - ) - for commit in new_commits - } + new_commits_info = {} + for commit in new_commits: + # Check if this commit is a revert + is_revert = ( + re.match( + r"^Revert \".*\"( \(#\d+\))?", commit.message, flags=re.IGNORECASE + ) + is not None + ) + + # Check which pull request or commit is being reverted (if any) + pull_request_match = re.search( + r"Reverts? (?:llvm\/llvm-project)?#(\d+)", commit.message, flags=re.IGNORECASE + ) + commit_match = re.search( + r"This reverts commit (\w+)", commit.message, flags=re.IGNORECASE + ) + pull_request_reverted = ( + int(pull_request_match.group(1)) if pull_request_match else None + ) + commit_reverted = commit_match.group(1) if commit_match else None + + # Add entry + new_commits_info[commit.hexsha] = LLVMCommitInfo( + commit_sha=commit.hexsha, + commit_timestamp_seconds=commit.committed_date, + diff=[ + { + "file": file, + "additions": line_stats["insertions"], + "deletions": line_stats["deletions"], + "total": line_stats["lines"], + } + for file, line_stats in commit.stats.files.items() + ], + is_revert=is_revert, + pull_request_reverted=pull_request_reverted, + commit_reverted=commit_reverted, + ) # Create GraphQL subqueries for each commit commit_subqueries = [] - for commit_sha in new_commits: + for commit_sha in new_commits_info: commit_subqueries.append( COMMIT_GRAPHQL_SUBQUERY_TEMPLATE.format(commit_sha=commit_sha) ) @@ -180,7 +207,7 @@ def query_for_reviews( # Amend commit information with GitHub data for commit_sha, data in api_commit_data.items(): commit_sha = commit_sha.removeprefix("commit_") - commit_info = new_commits[commit_sha] + commit_info = new_commits_info[commit_sha] commit_info.commit_author = data["author"]["user"]["login"] # If commit has no pull requests, skip it. No data to update. @@ -201,7 +228,7 @@ def query_for_reviews( # against what we want to measure, so remove them from the set of reviewers. commit_info.reviewers.discard(commit_info.commit_author) - return list(new_commits.values()) + return list(new_commits_info.values()) def upload_daily_metrics_to_bigquery( From 66523703408e43fce1c5d15fd050035e035b6363 Mon Sep 17 00:00:00 2001 From: Vladimir Vereschaka Date: Mon, 25 Aug 2025 21:32:54 -0700 Subject: [PATCH 135/135] Update the project dependencies for the lldb-remote-linux builders. Add additional dependencies with the libc++/libc++abi/libunwind and compiler-rt libraries. Also removed unaffected LLDB_CAN_USE_LLDB_SERVER options. --- buildbot/osuosl/master/config/builders.py | 36 ++++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py index 4f9d99028..28e706a80 100644 --- a/buildbot/osuosl/master/config/builders.py +++ b/buildbot/osuosl/master/config/builders.py @@ -3664,7 +3664,20 @@ def collapseRequestsDoxygen(master, builder, req1, req2): 'workernames': ["as-builder-9"], 'builddir': "lldb-remote-linux-ubuntu", 'factory': UnifiedTreeBuilder.getCmakeExBuildFactory( - depends_on_projects = ["llvm", "clang", "lld", "lldb"], + depends_on_projects = [ + 'llvm', + 'compiler-rt', + 'clang', + 'libunwind', + 'libcxx', + 'libcxxabi', + 'lld', + 'lldb', + ], + # Allow only these projects with LLVM_ENABLE_PROJECTS. + enable_projects = ["llvm", "clang", "lld", "lldb"], + # Use a proper list of runtimes (LLVM_ENABLE_RUNTIMES) from CrossWinToARMLinux.cmake. + # Avoid making it from a list of the depended projects. enable_runtimes = None, checks = [ "check-lldb-unit", @@ -3702,9 +3715,6 @@ def collapseRequestsDoxygen(master, builder, req1, req2): "LLDB_ENABLE_CURSES" : "OFF", "LLDB_ENABLE_LZMA" : "OFF", "LLDB_ENABLE_LIBXML2" : "OFF", - # No need to build lldb-server during the first stage. - # We are going to build it for the target platform later. - "LLDB_CAN_USE_LLDB_SERVER" : "OFF", "LLDB_TEST_USER_ARGS" : util.Interpolate( "--env;ARCH_CFLAGS=-mcpu=cortex-a78;" \ "--platform-name;remote-linux;" \ @@ -3793,7 +3803,20 @@ def collapseRequestsDoxygen(master, builder, req1, req2): 'workernames': ["as-builder-10"], 'builddir': "lldb-x-aarch64", 'factory': UnifiedTreeBuilder.getCmakeExBuildFactory( - depends_on_projects = ["llvm", "clang", "lld", "lldb"], + depends_on_projects = [ + 'llvm', + 'compiler-rt', + 'clang', + 'libunwind', + 'libcxx', + 'libcxxabi', + 'lld', + 'lldb', + ], + # Allow only these projects with LLVM_ENABLE_PROJECTS. + enable_projects = ["llvm", "clang", "lld", "lldb"], + # Use a proper list of runtimes (LLVM_ENABLE_RUNTIMES) from CrossWinToARMLinux.cmake. + # Avoid making it from a list of the depended projects. enable_runtimes = None, checks = [ "check-lldb-unit", @@ -3830,9 +3853,6 @@ def collapseRequestsDoxygen(master, builder, req1, req2): "LLDB_ENABLE_CURSES" : "OFF", "LLDB_ENABLE_LZMA" : "OFF", "LLDB_ENABLE_LIBXML2" : "OFF", - # No need to build lldb-server during the first stage. - # We are going to build it for the target platform later. - "LLDB_CAN_USE_LLDB_SERVER" : "OFF", "LLDB_TEST_USER_ARGS" : util.Interpolate( "--env;ARCH_CFLAGS=-mcpu=cortex-a78;" \ "--platform-name;remote-linux;" \