From e135f65eca67494e9ae327708b91f86de5717709 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Tue, 21 Oct 2025 14:44:00 -0400 Subject: [PATCH 1/2] Add a repository rule for efficient sysroots --- MODULE.bazel | 13 ++++++++ tests/MODULE.bazel | 11 ++---- tests/WORKSPACE | 30 ++++++++++++----- toolchain/sysroot.bzl | 78 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 17 deletions(-) create mode 100644 toolchain/sysroot.bzl diff --git a/MODULE.bazel b/MODULE.bazel index 5c4657ee..254bf6b7 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -21,9 +21,22 @@ module( bazel_dep(name = "bazel_features", version = "1.36.0") bazel_dep(name = "bazel_skylib", version = "1.5.0") +bazel_dep(name = "aspect_bazel_lib", version = "2.0.0") bazel_dep(name = "rules_cc", version = "0.2.2") bazel_dep(name = "platforms", version = "0.0.8") bazel_dep(name = "helly25_bzl", version = "0.1.2") +bazel_dep(name = "tar.bzl", version = "0.6.0") +tar_toolchains = use_extension("@tar.bzl//tar:extensions.bzl", "toolchains") +use_repo( + tar_toolchains, + "bsd_tar_toolchains_darwin_amd64", + "bsd_tar_toolchains_darwin_arm64", + "bsd_tar_toolchains_linux_amd64", + "bsd_tar_toolchains_linux_arm64", + "bsd_tar_toolchains_windows_amd64", + "bsd_tar_toolchains_windows_arm64", +) + # TODO: Remove when protobuf is released with a version of rules_python that supports 8.x bazel_dep(name = "rules_python", version = "1.0.0", dev_dependency = True) diff --git a/tests/MODULE.bazel b/tests/MODULE.bazel index 2bbd8815..13ef0ed1 100644 --- a/tests/MODULE.bazel +++ b/tests/MODULE.bazel @@ -253,18 +253,11 @@ libclang_rt_wasm32 = use_repo_rule("//wasm:wasi_sdk.bzl", "libclang_rt_wasm32") libclang_rt_wasm32(name = "libclang_rt_wasm32") -http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +sysroot = use_repo_rule("@toolchains_llvm//toolchain:sysroot.bzl", "sysroot") # This sysroot is used by github.com/vsco/bazel-toolchains. -http_archive( +sysroot( name = "org_chromium_sysroot_linux_x64", - build_file_content = """ -filegroup( - name = "sysroot", - srcs = glob(["*/**"]), - visibility = ["//visibility:public"], -) -""", sha256 = "84656a6df544ecef62169cfe3ab6e41bb4346a62d3ba2a045dc5a0a2ecea94a3", urls = ["https://commondatastorage.googleapis.com/chrome-linux-sysroot/toolchain/2202c161310ffde63729f29d27fe7bb24a0bc540/debian_stretch_amd64_sysroot.tar.xz"], ) diff --git a/tests/WORKSPACE b/tests/WORKSPACE index 940fb00a..56aefc54 100644 --- a/tests/WORKSPACE +++ b/tests/WORKSPACE @@ -20,6 +20,25 @@ local_repository( ) load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "aspect_bazel_lib", + sha256 = "c4f36285ceed51f75da44ffcf8fa393794d0dc2e273a2e03be50462e347740cd", + strip_prefix = "bazel-lib-2.0.0", + url = "https://github.com/aspect-build/bazel-lib/releases/download/v2.0.0/bazel-lib-v2.0.0.tar.gz", +) + +http_archive( + name = "tar.bzl", + sha256 = "a147d473a359742db2a43c8a9a8e04e31321582e6bb669dafc5ba6b2c59845d1", + strip_prefix = "tar.bzl-0.6.0", + url = "https://github.com/bazel-contrib/tar.bzl/releases/download/v0.6.0/tar.bzl-v0.6.0.tar.gz", +) + +load("@tar.bzl//tar:extensions.bzl", "create_repositories") + +create_repositories() + load("@toolchains_llvm//toolchain:deps.bzl", "bazel_toolchain_dependencies") bazel_toolchain_dependencies() @@ -114,15 +133,10 @@ llvm_toolchain( ## Toolchain example with a sysroot. # This sysroot is used by github.com/vsco/bazel-toolchains. -http_archive( +load("@toolchains_llvm//toolchain:sysroot.bzl", "sysroot") + +sysroot( name = "org_chromium_sysroot_linux_x64", - build_file_content = """ -filegroup( - name = "sysroot", - srcs = glob(["*/**"]), - visibility = ["//visibility:public"], -) -""", sha256 = "84656a6df544ecef62169cfe3ab6e41bb4346a62d3ba2a045dc5a0a2ecea94a3", urls = ["https://commondatastorage.googleapis.com/chrome-linux-sysroot/toolchain/2202c161310ffde63729f29d27fe7bb24a0bc540/debian_stretch_amd64_sysroot.tar.xz"], ) diff --git a/toolchain/sysroot.bzl b/toolchain/sysroot.bzl new file mode 100644 index 00000000..264c4d31 --- /dev/null +++ b/toolchain/sysroot.bzl @@ -0,0 +1,78 @@ +load("@aspect_bazel_lib//lib:repo_utils.bzl", "repo_utils") + +def _sysroot_impl(rctx): + urls = rctx.attr.urls + if rctx.attr.url: + urls = [rctx.attr.url] + urls + + if not urls: + fail("At least one of url and urls must be provided") + + _, _, archive = urls[0].rpartition("/") + + rctx.download(urls, archive, sha256 = rctx.attr.sha256) + + # Sysroot handling has assumptions about the filegroup's package matching the sysroot directory, + # but provide an alias to handle the existing usage of the `//:sysroot` target. + rctx.file( + "BUILD.bazel", + """alias( + name = "sysroot", + actual = "//sysroot", + visibility = ["//visibility:public"], +)""", + ) + + # Declare the sysroot files as a source directory so they can be + # optimized in the Merkle tree cache more effectively. + # Also, create the BUILD file before extracting because `bsdtar` expects the target + # directory to exist, and this way Bazel creates it for us without needing `mkdir`. + rctx.file( + "sysroot/BUILD.bazel", + """filegroup( + name = "sysroot", + srcs = ["."], + visibility = ["//visibility:public"], +)""", + ) + + host_bsdtar = Label("@bsd_tar_toolchains_%s//:tar" % repo_utils.platform(rctx)) + + cmd = [ + str(rctx.path(host_bsdtar)), + "--extract", + "--no-same-owner", + "--no-same-permissions", + "--file", + archive, + "--directory", + "sysroot", + ] + + for include in rctx.attr.include_patterns: + cmd.extend(["--include", include]) + + for exclude in rctx.attr.exclude_patterns: + cmd.extend(["--exclude", exclude]) + + result = rctx.execute(cmd) + if result.return_code != 0: + fail(result.stdout + result.stderr) + + rctx.delete(archive) + + if hasattr(rctx, "repo_metadata"): + return rctx.repo_metadata(reproducible = True) + else: + return None + +sysroot = repository_rule( + implementation = _sysroot_impl, + attrs = { + "url": attr.string(), + "urls": attr.string_list(), + "sha256": attr.string(), + "include_patterns": attr.string_list(), + "exclude_patterns": attr.string_list(), + }, +) From 1cf2c5ae5e79f0fb3b79be107a7b4785cb2a2e72 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Fri, 24 Oct 2025 14:03:42 -0400 Subject: [PATCH 2/2] sysroot module map --- tests/MODULE.bazel | 2 +- tests/WORKSPACE | 2 +- toolchain/internal/configure.bzl | 27 ++++++++++++++---------- toolchain/internal/system_module_map.bzl | 19 ++++++++++++++++- toolchain/internal/template.modulemap | 3 ++- toolchain/sysroot.bzl | 16 ++++---------- 6 files changed, 42 insertions(+), 27 deletions(-) diff --git a/tests/MODULE.bazel b/tests/MODULE.bazel index 13ef0ed1..4107779b 100644 --- a/tests/MODULE.bazel +++ b/tests/MODULE.bazel @@ -171,7 +171,7 @@ llvm.toolchain_root( ) llvm.sysroot( name = "llvm_toolchain_with_sysroot", - label = "@org_chromium_sysroot_linux_x64//:sysroot", + label = "@org_chromium_sysroot_linux_x64//sysroot", targets = ["linux-x86_64"], ) use_repo(llvm, "llvm_toolchain_with_sysroot") diff --git a/tests/WORKSPACE b/tests/WORKSPACE index 56aefc54..f49111b1 100644 --- a/tests/WORKSPACE +++ b/tests/WORKSPACE @@ -145,7 +145,7 @@ llvm_toolchain( name = "llvm_toolchain_with_sysroot", llvm_versions = LLVM_VERSIONS, sysroot = { - "linux-x86_64": "@org_chromium_sysroot_linux_x64//:sysroot", + "linux-x86_64": "@org_chromium_sysroot_linux_x64//sysroot", }, # We can share the downloaded LLVM distribution with the first configuration. toolchain_roots = { diff --git a/toolchain/internal/configure.bzl b/toolchain/internal/configure.bzl index fba75541..9816b677 100644 --- a/toolchain/internal/configure.bzl +++ b/toolchain/internal/configure.bzl @@ -320,7 +320,7 @@ def _cc_toolchain_str( sysroot_path = toolchain_info.sysroot_paths_dict.get(target_pair) sysroot_label = toolchain_info.sysroot_labels_dict.get(target_pair) if sysroot_label: - sysroot_label_str = "\"%s\"" % str(sysroot_label) + sysroot_label_str = repr(str(sysroot_label)) else: sysroot_label_str = "" @@ -466,6 +466,11 @@ filegroup( if use_absolute_paths_llvm: template = template + """ +filegroup( + name = "cxx_builtin_include_files-{suffix}", + srcs = [], +) + filegroup( name = "compiler-components-{suffix}", srcs = [ @@ -499,10 +504,17 @@ filegroup(name = "strip-files-{suffix}", srcs = [{extra_files_str}]) else: template = template + """ filegroup( - name = "compiler-components-{suffix}", + name = "cxx_builtin_include_files-{suffix}", srcs = [ "{llvm_dist_label_prefix}clang", "{llvm_dist_label_prefix}include", + ], +) + +filegroup( + name = "compiler-components-{suffix}", + srcs = [ + ":cxx_builtin_include_files-{suffix}", ":sysroot-components-{suffix}", {extra_compiler_files} ], @@ -539,18 +551,11 @@ filegroup(name = "strip-files-{suffix}", srcs = ["{llvm_dist_label_prefix}strip" """ template = template + """ -filegroup( - name = "include-components-{suffix}", - srcs = [ - ":compiler-components-{suffix}", - ":sysroot-components-{suffix}", - ], -) - system_module_map( name = "module-{suffix}", - cxx_builtin_include_files = ":include-components-{suffix}", + cxx_builtin_include_files = ":cxx_builtin_include_files-{suffix}", cxx_builtin_include_directories = {cxx_builtin_include_directories}, + sysroot_files = ":sysroot-components-{suffix}", sysroot_path = "{sysroot_path}", ) diff --git a/toolchain/internal/system_module_map.bzl b/toolchain/internal/system_module_map.bzl index 92571fd9..8b38a8db 100644 --- a/toolchain/internal/system_module_map.bzl +++ b/toolchain/internal/system_module_map.bzl @@ -57,12 +57,28 @@ def _system_module_map(ctx): template_dict = ctx.actions.template_dict() template_dict.add_joined( - "%textual_headers%", + "%cxx_builtin_include_files%", ctx.attr.cxx_builtin_include_files[DefaultInfo].files, join_with = "\n", map_each = textual_header_closure, allow_closure = True, ) + + # We don't have a good way to detect a source directory, so check if it's a single File... + sysroot_files = ctx.attr.sysroot_files[DefaultInfo].files.to_list() + if len(sysroot_files) == 1: + path = paths.normalize(sysroot_files[0].path).replace("//", "/") + template_dict.add("%sysroot%", _umbrella_submodule(execroot_prefix + path)) + else: + print("WARNING: Sysroot {} did not resolve to a single (directory) file. Consider using the `sysroot` repository rule in @toolchains_llvm//toolchain:sysroot.bzl for more efficient builds.".format(ctx.attr.sysroot_files.label)) # buildifier: disable=print + template_dict.add_joined( + "%sysroot%", + ctx.attr.sysroot_files[DefaultInfo].files, + join_with = "\n", + map_each = textual_header_closure, + allow_closure = True, + ) + template_dict.add_joined( "%umbrella_submodules%", depset(absolute_path_dirs), @@ -87,6 +103,7 @@ system_module_map = rule( attrs = { "cxx_builtin_include_files": attr.label(mandatory = True), "cxx_builtin_include_directories": attr.string_list(mandatory = True), + "sysroot_files": attr.label(), "sysroot_path": attr.string(), "_module_map_template": attr.label( default = "template.modulemap", diff --git a/toolchain/internal/template.modulemap b/toolchain/internal/template.modulemap index 65811e0b..c1f63983 100644 --- a/toolchain/internal/template.modulemap +++ b/toolchain/internal/template.modulemap @@ -1,4 +1,5 @@ module "crosstool" [system] { -%textual_headers% +%cxx_builtin_include_files% +%sysroot% %umbrella_submodules% } diff --git a/toolchain/sysroot.bzl b/toolchain/sysroot.bzl index 264c4d31..83e4ed3a 100644 --- a/toolchain/sysroot.bzl +++ b/toolchain/sysroot.bzl @@ -12,19 +12,11 @@ def _sysroot_impl(rctx): rctx.download(urls, archive, sha256 = rctx.attr.sha256) - # Sysroot handling has assumptions about the filegroup's package matching the sysroot directory, - # but provide an alias to handle the existing usage of the `//:sysroot` target. - rctx.file( - "BUILD.bazel", - """alias( - name = "sysroot", - actual = "//sysroot", - visibility = ["//visibility:public"], -)""", - ) + # Source directories are more efficient than file groups for 2 reasons: + # - They can be symlinked into a local sandbox with a single symlink instead of 1-per-file + # - They serve as a signal to the Merkle tree cache machinery since they can be memoized as a single node. + # Since sysroots are usually a ton of files, it can improve build performance to declare them as source directories. - # Declare the sysroot files as a source directory so they can be - # optimized in the Merkle tree cache more effectively. # Also, create the BUILD file before extracting because `bsdtar` expects the target # directory to exist, and this way Bazel creates it for us without needing `mkdir`. rctx.file(