From e053fd5377ac2c75bb8f85074027b0d1a57fe63f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 17 Apr 2026 21:04:23 +0000 Subject: [PATCH 1/2] Add some additional debug logging to regalloc_trace_worker Given these workers typically run on remote machines, it's hard to tell what is going on without debug logging. Add some debug entries that I found useful when getting things spun up recently. This especially makes debugging performance issues easier as at least you can see where the issue is (e.g., copying the corpus taking an hour+ because you didn't realize the data needed to move between DCs). --- .../es/regalloc_trace/regalloc_trace_worker.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py b/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py index 86046081..33b04d6d 100644 --- a/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py +++ b/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py @@ -29,6 +29,7 @@ import shutil from typing import Any +from absl import logging import gin import tensorflow as tf @@ -82,6 +83,7 @@ def _copy_corpus(self, corpus_path: str, copy_corpus_locally_path: str | None, if tf.io.gfile.exists(copy_corpus_locally_path): return + logging.info("Starting to copy the corpus locally.") with tf.io.gfile.GFile( os.path.join(corpus_path, "corpus_description.json"), "r") as corpus_description_file: @@ -114,6 +116,7 @@ def _copy_corpus(self, corpus_path: str, copy_corpus_locally_path: str | None, for copy_future in copy_futures: if copy_future.exception() is not None: raise copy_future.exception() + logging.info("Finished creating a local copy of the corpus.") def __init__( self, @@ -150,6 +153,7 @@ def __init__( extra_bb_trace_model_flags: Extra flags to pass to the basic_block_trace_model invocation. """ + logging.info("Initializing a regalloc_trace worker.") self._clang_path = clang_path self._basic_block_trace_model_path = basic_block_trace_model_path self._thread_count = thread_count @@ -313,8 +317,12 @@ def compile_corpus_and_evaluate(self, modules: Collection[corpus.ModuleSpec], tflite_policy_path = policy_utils.convert_to_tflite( policy_as_bytes, compilation_dir, self._tf_base_policy_path) + logging.info("Building the corpus.") self.build_corpus(modules, compilation_dir, tflite_policy_path) + logging.info("Evaluating the corpus.") segment_costs = self._evaluate_corpus(compilation_dir, function_index_path, bb_trace_path) - return sum(segment_costs) + score = sum(segment_costs) + logging.info("Finished evaluating the corpus. The score was %f", score) + return sum(score) From 18860035f3ffd6a739a379947e2bebd1df280bbc Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 17 Apr 2026 22:12:14 +0000 Subject: [PATCH 2/2] fix --- compiler_opt/es/regalloc_trace/regalloc_trace_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py b/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py index 33b04d6d..65b045f6 100644 --- a/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py +++ b/compiler_opt/es/regalloc_trace/regalloc_trace_worker.py @@ -325,4 +325,4 @@ def compile_corpus_and_evaluate(self, modules: Collection[corpus.ModuleSpec], function_index_path, bb_trace_path) score = sum(segment_costs) logging.info("Finished evaluating the corpus. The score was %f", score) - return sum(score) + return score