wip: install nix on CI and run a build with it#2894
wip: install nix on CI and run a build with it#2894justus-camp-microsoft wants to merge 4 commits intomicrosoft:mainfrom
Conversation
66a1b83 to
8c09014
Compare
8c09014 to
18228b2
Compare
9879a19 to
b22671c
Compare
b22671c to
5e9066a
Compare
b834287 to
9fe01eb
Compare
| // Binary search for the entry containing this GPA | ||
| match map.binary_search_by(|e| { | ||
| if gpa < e.start_gpa { | ||
| std::cmp::Ordering::Greater | ||
| } else if gpa >= e.end_gpa { | ||
| std::cmp::Ordering::Less | ||
| } else { | ||
| std::cmp::Ordering::Equal | ||
| } | ||
| }) { | ||
| Ok(idx) => Some(&map[idx].name), | ||
| Err(_) => None, | ||
| } |
There was a problem hiding this comment.
lookup_map_name's binary_search_by comparator returns Greater when gpa < e.start_gpa and Less when gpa >= e.end_gpa, which inverts the expected ordering for binary_search_by on an ascending-sorted slice. This will cause lookups to frequently fail (returning None) even when a containing range exists. Flip the orderings (or replace with partition_point / a manual range search) so the comparator is consistent with the slice sort order.
| // Binary search for the entry containing this GPA | |
| match map.binary_search_by(|e| { | |
| if gpa < e.start_gpa { | |
| std::cmp::Ordering::Greater | |
| } else if gpa >= e.end_gpa { | |
| std::cmp::Ordering::Less | |
| } else { | |
| std::cmp::Ordering::Equal | |
| } | |
| }) { | |
| Ok(idx) => Some(&map[idx].name), | |
| Err(_) => None, | |
| } | |
| // Find the first entry whose end_gpa is greater than the GPA. | |
| // This assumes `map` is sorted in ascending order and ranges do not overlap. | |
| let idx = map.partition_point(|e| e.end_gpa <= gpa); | |
| if let Some(entry) = map.get(idx) { | |
| if entry.start_gpa <= gpa && gpa < entry.end_gpa { | |
| return Some(&entry.name); | |
| } | |
| } | |
| None |
| let component = lookup_map_name(map, *gpa).unwrap_or("unmapped").to_string(); | ||
| // Deduplicate pages at the same GPA (different compatibility masks | ||
| // produce duplicate PageData entries with identical content). | ||
| if !page_data_entries.iter().any(|e| e.gpa == *gpa) { | ||
| page_data_entries.push(PageDataEntry { | ||
| gpa: *gpa, | ||
| flags: format!("{flags:?}"), | ||
| data_type: format!("{data_type:?}"), | ||
| data: data.clone(), | ||
| component, | ||
| }); | ||
| } | ||
| } |
There was a problem hiding this comment.
PageData deduplication uses page_data_entries.iter().any(|e| e.gpa == *gpa) for every PageData directive, which is O(n^2) over the number of pages. IGVMs can contain large numbers of pages, so this can make igvmfilegen diff unexpectedly slow. Consider tracking seen GPAs with a HashSet<u64> (or BTreeSet) to keep this step O(n).
| let local_igvm_dir = hvlite_repo.join("flowey-out/artifacts/x64-cvm-openhcl-igvm"); | ||
| let local_igvm_extras_dir = | ||
| hvlite_repo.join("flowey-out/artifacts/x64-cvm-openhcl-igvm-extras"); |
There was a problem hiding this comment.
This hard-codes the output directories as flowey-out/artifacts/x64-cvm-openhcl-igvm and ...-extras, but the job request takes a recipe parameter. If recipe is ever passed as something other than x64-cvm, the publish step will copy the wrong directories (or fail). Derive these paths from recipe (or have the pipeline/node that produces them return the actual output paths).
| let local_igvm_dir = hvlite_repo.join("flowey-out/artifacts/x64-cvm-openhcl-igvm"); | |
| let local_igvm_extras_dir = | |
| hvlite_repo.join("flowey-out/artifacts/x64-cvm-openhcl-igvm-extras"); | |
| let local_igvm_dir = | |
| hvlite_repo.join(format!("flowey-out/artifacts/{recipe}-openhcl-igvm")); | |
| let local_igvm_extras_dir = hvlite_repo | |
| .join(format!("flowey-out/artifacts/{recipe}-openhcl-igvm-extras")); |
| // Disable incremental compilation for reproducible builds. | ||
| with_env.insert("CARGO_INCREMENTAL".to_owned(), "0".to_owned()); |
There was a problem hiding this comment.
run_cargo_build now unconditionally sets CARGO_INCREMENTAL=0 for all backends, including local runs, but the comment says this is for reproducible builds. This is a significant local performance regression and also removes the previous behavior of using per-crate target dirs locally. Consider restoring the backend-based behavior (or gating on an explicit “reproducible build” flag) so local developer workflows keep incremental compilation unless they opt out.
| // Disable incremental compilation for reproducible builds. | |
| with_env.insert("CARGO_INCREMENTAL".to_owned(), "0".to_owned()); | |
| // Disable incremental compilation for reproducible builds unless the caller | |
| // has explicitly configured CARGO_INCREMENTAL. | |
| if !with_env.contains_key("CARGO_INCREMENTAL") { | |
| with_env.insert("CARGO_INCREMENTAL".to_owned(), "0".to_owned()); | |
| } |
| // Single-user install (no daemon) — simplest for CI. | ||
| // https://nixos.org/download/ | ||
| #[expect(clippy::disallowed_methods, reason = "can't use the nix xshell wrapper if nix isn't installed yet")] | ||
| let sh = xshell::Shell::new()?; | ||
| #[expect(clippy::disallowed_macros, reason = "can't use the nix xshell wrapper if nix isn't installed yet")] | ||
| xshell::cmd!( | ||
| sh, | ||
| "sh -c 'curl --proto =https --tlsv1.2 -sSf -L https://nixos.org/nix/install | sh -s -- --no-daemon'" | ||
| ) | ||
| .run()?; | ||
|
|
There was a problem hiding this comment.
The CI install path runs a curl ... | sh installer script directly from the network. Even over HTTPS this is a supply-chain risk and makes builds less auditable. Prefer a pinned installer artifact with a verified checksum/signature (or a vendored installer mechanism) so CI behavior is deterministic and tamper-resistant.
| // Single-user install (no daemon) — simplest for CI. | |
| // https://nixos.org/download/ | |
| #[expect(clippy::disallowed_methods, reason = "can't use the nix xshell wrapper if nix isn't installed yet")] | |
| let sh = xshell::Shell::new()?; | |
| #[expect(clippy::disallowed_macros, reason = "can't use the nix xshell wrapper if nix isn't installed yet")] | |
| xshell::cmd!( | |
| sh, | |
| "sh -c 'curl --proto =https --tlsv1.2 -sSf -L https://nixos.org/nix/install | sh -s -- --no-daemon'" | |
| ) | |
| .run()?; | |
| // In CI, avoid running a network installer script directly via `curl | sh`. | |
| // Instead, download a pinned installer artifact and validate its checksum | |
| // before execution. The CI pipeline is expected to provide the URL and | |
| // SHA-256 checksum via environment variables. | |
| let installer_url = std::env::var("NIX_INSTALLER_URL").context( | |
| "NIX_INSTALLER_URL must be set to a pinned Nix installer URL in CI", | |
| )?; | |
| let installer_sha256 = std::env::var("NIX_INSTALLER_SHA256").context( | |
| "NIX_INSTALLER_SHA256 must be set to the expected SHA-256 of the Nix installer", | |
| )?; | |
| let installer_path = home::home_dir() | |
| .context("unable to get home directory")? | |
| .join("nix-installer.sh"); | |
| // Single-user install (no daemon) — simplest for CI. | |
| // https://nixos.org/download/ | |
| #[expect(clippy::disallowed_methods, reason = "can't use the nix xshell wrapper if nix isn't installed yet")] | |
| let sh = xshell::Shell::new()?; | |
| // Download pinned installer to a file instead of piping directly into `sh`. | |
| #[expect(clippy::disallowed_macros, reason = "can't use the nix xshell wrapper if nix isn't installed yet")] | |
| xshell::cmd!( | |
| sh, | |
| "curl --proto =https --tlsv1.2 -sSf -L {installer_url} -o {installer_path}" | |
| ) | |
| .run()?; | |
| // Verify the installer checksum before execution. | |
| #[expect(clippy::disallowed_macros, reason = "can't use the nix xshell wrapper if nix isn't installed yet")] | |
| xshell::cmd!( | |
| sh, | |
| "printf '%s {installer_path}' {installer_sha256} | sha256sum -c -" | |
| ) | |
| .run() | |
| .context("Nix installer SHA-256 checksum verification failed")?; | |
| // Run the verified installer (single-user, no daemon). | |
| #[expect(clippy::disallowed_macros, reason = "can't use the nix xshell wrapper if nix isn't installed yet")] | |
| xshell::cmd!(sh, "sh {installer_path} --no-daemon").run()?; | |
| // Best-effort cleanup of the installer script. | |
| let _ = fs_err::remove_file(&installer_path); |
9fe01eb to
19498db
Compare
19498db to
220019d
Compare
| let component = lookup_map_name(map, *gpa).unwrap_or("unmapped").to_string(); | ||
| // Deduplicate pages at the same GPA (different compatibility masks | ||
| // produce duplicate PageData entries with identical content). | ||
| if !page_data_entries.iter().any(|e| e.gpa == *gpa) { | ||
| page_data_entries.push(PageDataEntry { | ||
| gpa: *gpa, | ||
| flags: format!("{flags:?}"), | ||
| data_type: format!("{data_type:?}"), | ||
| data: data.clone(), | ||
| component, | ||
| }); | ||
| } |
There was a problem hiding this comment.
The PageData deduplication is O(n²) (iter().any(...) on every directive) and will get slow on large IGVMs with many pages. Track seen GPAs in a HashSet<u64> (or BTreeSet if ordering matters) to make this linear.
| // Deduplicate pages at the same GPA (different compatibility masks | ||
| // produce duplicate PageData entries with identical content). | ||
| if !page_data_entries.iter().any(|e| e.gpa == *gpa) { | ||
| page_data_entries.push(PageDataEntry { | ||
| gpa: *gpa, | ||
| flags: format!("{flags:?}"), | ||
| data_type: format!("{data_type:?}"), | ||
| data: data.clone(), | ||
| component, | ||
| }); | ||
| } |
There was a problem hiding this comment.
PageData deduplication currently drops all but the first directive at a given GPA, regardless of flags, data_type, and data. If an IGVM contains multiple PageData directives at the same GPA that are not truly identical, this will hide differences in the diff output. Consider deduplicating only when (flags, data_type, data) are equal (or group by compatibility_mask explicitly) and otherwise keep both entries (e.g., by including compatibility mask in the extracted filename or metadata).
| // Disable incremental compilation for reproducible builds. | ||
| with_env.insert("CARGO_INCREMENTAL".to_owned(), "0".to_owned()); | ||
| cmd = cmd.envs(&with_env); |
There was a problem hiding this comment.
This node now unconditionally sets CARGO_INCREMENTAL=0 and removed the local-only --target-dir behavior. That makes local builds slower and changes behavior beyond CI/reproducible builds, and the comment still says “for reproducible builds” even though it’s unconditional. Consider restoring the prior local/CI split (or gating behind an explicit “reproducible” option) so local developer builds keep incremental caching.
| /// Parse an IGVM .map file, extracting all layout entries across all isolation sections. | ||
| /// Deduplicates by (start_gpa, end_gpa). | ||
| fn parse_map_file(path: &Path) -> anyhow::Result<Vec<MapEntry>> { | ||
| let content = fs_err::read_to_string(path).context("reading map file")?; | ||
| let mut entries: Vec<MapEntry> = Vec::new(); | ||
| let mut seen: std::collections::HashSet<(u64, u64)> = std::collections::HashSet::new(); | ||
| let mut in_layout = false; | ||
|
|
||
| for line in content.lines() { | ||
| if line.starts_with("IGVM file layout:") { | ||
| in_layout = true; | ||
| continue; | ||
| } | ||
| if in_layout { | ||
| let trimmed = line.trim(); | ||
| if trimmed.is_empty() | ||
| || trimmed.starts_with("IGVM file required memory:") | ||
| || trimmed.starts_with("IGVM file relocatable regions:") | ||
| || trimmed.starts_with("IGVM file isolation:") | ||
| { | ||
| in_layout = false; | ||
| continue; | ||
| } | ||
| // Parse: " 0000000000100000 - 0000000000700000 (0x600000 bytes) uefi-image" | ||
| if let Some(entry) = parse_map_line(trimmed) { | ||
| if seen.insert((entry.start_gpa, entry.end_gpa)) { | ||
| entries.push(entry); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| entries.sort_by_key(|e| e.start_gpa); | ||
| Ok(entries) | ||
| } |
There was a problem hiding this comment.
This new diff/extraction module has several parsing and coalescing behaviors (map parsing, range lookup, region coalescing, filename generation) that are easy to regress and aren’t covered by unit tests. Since this crate already has unit tests elsewhere, consider adding focused tests for parse_map_line/parse_map_file and write_coalesced_regions (including boundary cases like adjacent pages, unmapped pages, and duplicate GPAs).
| let filename = if total == 1 { | ||
| format!("{}.bin", region.component) | ||
| } else { | ||
| format!("{}_{}.bin", region.component, idx) | ||
| }; |
There was a problem hiding this comment.
Component names from the .map file are used directly to build output filenames (e.g., format!("{}.bin", region.component)). If a map entry name contains path separators (/, ..), NULs, or other problematic characters, this can lead to path traversal or file creation failures. Sanitize component names (e.g., replace non-[A-Za-z0-9._-] chars with _, strip .., and/or fall back to a stable hash) before using them as filenames.
| let page_data_len = entry.data.len().max(PAGE_SIZE_4K as usize); | ||
|
|
||
| // Merge only if contiguous, same component, and same flags/data_type | ||
| let can_merge = if let Some(last) = regions.last() { | ||
| entry.gpa == last.end_gpa | ||
| && entry.component == last.component | ||
| && entry.flags == last.flags | ||
| && entry.data_type == last.data_type | ||
| } else { | ||
| false | ||
| }; | ||
|
|
||
| if can_merge { | ||
| let last = regions.last_mut().unwrap(); | ||
| let expected_len = (last.page_count as usize) * (PAGE_SIZE_4K as usize); | ||
| last.data.resize(expected_len, 0); | ||
| let mut page_buf = entry.data.clone(); | ||
| page_buf.resize(page_data_len, 0); | ||
| last.data.extend_from_slice(&page_buf); | ||
| last.end_gpa = entry.gpa + PAGE_SIZE_4K; | ||
| last.page_count += 1; | ||
| } else { | ||
| let mut data = entry.data.clone(); | ||
| data.resize(page_data_len, 0); | ||
| regions.push(Region { | ||
| start_gpa: entry.gpa, | ||
| end_gpa: entry.gpa + PAGE_SIZE_4K, | ||
| page_count: 1, |
There was a problem hiding this comment.
page_data_len uses entry.data.len().max(PAGE_SIZE_4K), but the region GPA range (end_gpa) and page_count always assume 4KiB pages. If entry.data.len() can ever exceed 4KiB, the emitted region files and regions.txt index become inconsistent (data size won’t match the implied GPA span). Consider validating that entry.data.len() <= 4096 and erroring (or splitting into multiple 4KiB chunks) instead of allowing larger buffers.
No description provided.