From 908315d47fde35bb9b3d501bbc3bd1cc66c4100d Mon Sep 17 00:00:00 2001 From: Mihai Date: Tue, 11 Feb 2025 16:11:42 +0200 Subject: [PATCH] introduce structured hints --- Cargo.lock | 1 + ceno_zkvm/Cargo.toml | 1 + ceno_zkvm/src/bin/e2e.rs | 48 +++++++++++++++++++++++++++---- ceno_zkvm/src/e2e.rs | 61 ++++------------------------------------ 4 files changed, 50 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 76fac0dfe..39c9b2a21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -353,6 +353,7 @@ dependencies = [ "base64", "bincode", "ceno_emul", + "ceno_host", "cfg-if", "clap", "criterion", diff --git a/ceno_zkvm/Cargo.toml b/ceno_zkvm/Cargo.toml index 97a7c9364..4bfdb96da 100644 --- a/ceno_zkvm/Cargo.toml +++ b/ceno_zkvm/Cargo.toml @@ -20,6 +20,7 @@ serde_json.workspace = true base64 = "0.22" ceno_emul = { path = "../ceno_emul" } +ceno_host = { path = "../ceno_host" } ff_ext = { path = "../ff_ext" } mpcs = { path = "../mpcs" } multilinear_extensions = { version = "0", path = "../multilinear_extensions" } diff --git a/ceno_zkvm/src/bin/e2e.rs b/ceno_zkvm/src/bin/e2e.rs index f1da01d51..9384cc6ca 100644 --- a/ceno_zkvm/src/bin/e2e.rs +++ b/ceno_zkvm/src/bin/e2e.rs @@ -1,4 +1,5 @@ use ceno_emul::{IterAddresses, Program, WORD_SIZE, Word}; +use ceno_host::CenoStdin; use ceno_zkvm::{ e2e::{Checkpoint, Preset, run_e2e_with_checkpoint, setup_platform}, with_panic_hook, @@ -48,8 +49,16 @@ struct Args { /// Hints: prover-private unconstrained input. /// This is a raw file mapped as a memory segment. /// Zero-padded to the right to the next power-of-two size. - #[arg(long)] - hints: Option, + #[arg(long, conflicts_with = "structured_hints")] + raw_hints: Option, + + /// Hints: prover-private unconstrained input. + /// This is a file containing decimal representations of + /// a value N, followed by N u32 integers. + /// The N integers are processed so they can be read + /// directly by guest programs. + #[arg(long, conflicts_with = "raw_hints")] + structured_hints: Option, /// Stack size in bytes. #[arg(long, default_value = "32k", value_parser = parse_size)] @@ -120,8 +129,19 @@ fn main() { args.heap_size ); - tracing::info!("Loading hints file: {:?}", args.hints); - let hints = memory_from_file(&args.hints); + let (hints, filename) = if args.raw_hints.is_some() { + (read_raw_hints(&args.raw_hints), args.raw_hints) + } else if args.structured_hints.is_some() { + ( + read_structured_hints(&args.structured_hints), + args.structured_hints, + ) + } else { + (vec![], None) + }; + + tracing::info!("Loading hints file: {:?}", filename); + assert!( hints.len() <= platform.hints.iter_addresses().len(), "hints must fit in {} bytes", @@ -187,7 +207,7 @@ fn main() { } }; } -fn memory_from_file(path: &Option) -> Vec { +fn read_raw_hints(path: &Option) -> Vec { path.as_ref() .map(|path| { let mut buf = fs::read(path).expect("could not read file"); @@ -198,3 +218,21 @@ fn memory_from_file(path: &Option) -> Vec { }) .unwrap_or_default() } + +/// Reads a sequence of u32s and formats it as guest input +fn read_structured_hints(path: &Option) -> Vec { + let structured_hints = + fs::read_to_string(path.as_ref().unwrap()).expect("could not read structured hints file"); + + let mut parts = structured_hints.split_whitespace(); + let n: usize = parts.next().unwrap().parse().expect("could not parse N"); + let values: Vec = parts + .take(n) + .map(|part| part.parse().expect("could not parse hint")) + .collect(); + + // Serialize the read values into the suitable format + let mut input = CenoStdin::default(); + input.write(&values).unwrap(); + (&input).into() +} diff --git a/ceno_zkvm/src/e2e.rs b/ceno_zkvm/src/e2e.rs index 31cf9d55c..e44721b7d 100644 --- a/ceno_zkvm/src/e2e.rs +++ b/ceno_zkvm/src/e2e.rs @@ -14,18 +14,14 @@ use crate::{ tables::{MemFinalRecord, MemInitRecord, ProgramTableCircuit, ProgramTableConfig}, }; use ceno_emul::{ - ByteAddr, CENO_PLATFORM, EmuContext, InsnKind, IterAddresses, Platform, Program, StepRecord, - Tracer, VMState, WORD_SIZE, WordAddr, + CENO_PLATFORM, EmuContext, InsnKind, IterAddresses, Platform, Program, StepRecord, Tracer, + VMState, WORD_SIZE, WordAddr, }; use clap::ValueEnum; use ff_ext::ExtensionField; -use itertools::{Itertools, MinMaxResult, chain}; +use itertools::{Itertools, chain}; use mpcs::PolynomialCommitmentScheme; -use std::{ - collections::{BTreeSet, HashMap, HashSet}, - iter::zip, - sync::Arc, -}; +use std::{collections::BTreeSet, iter::zip, sync::Arc}; use transcript::BasicTranscript as Transcript; pub struct FullMemState { @@ -129,7 +125,7 @@ fn emulate_program( } }) .collect_vec(); - debug_memory_ranges(&vm, &mem_final); + // debug_memory_ranges(&vm, &mem_final); // Find the final public IO cycles. let io_final = io_init @@ -548,50 +544,3 @@ pub fn run_e2e_verify>( None => tracing::error!("Unfinished execution. max_steps={:?}.", max_steps), } } - -fn debug_memory_ranges(vm: &VMState, mem_final: &[MemFinalRecord]) { - let accessed_addrs = vm - .tracer() - .final_accesses() - .iter() - .filter(|(_, &cycle)| (cycle != 0)) - .map(|(&addr, _)| addr.baddr()) - .filter(|addr| vm.platform().can_read(addr.0)) - .collect_vec(); - - let handled_addrs = mem_final - .iter() - .filter(|rec| rec.cycle != 0) - .map(|rec| ByteAddr(rec.addr)) - .collect::>(); - - tracing::debug!( - "Memory range (accessed): {:?}", - format_segments(vm.platform(), accessed_addrs.iter().copied()) - ); - tracing::debug!( - "Memory range (handled): {:?}", - format_segments(vm.platform(), handled_addrs.iter().copied()) - ); - - for addr in &accessed_addrs { - assert!(handled_addrs.contains(addr), "unhandled addr: {:?}", addr); - } -} - -fn format_segments( - platform: &Platform, - addrs: impl Iterator, -) -> HashMap> { - addrs - .into_grouping_map_by(|addr| format_segment(platform, addr.0)) - .minmax() -} - -fn format_segment(platform: &Platform, addr: u32) -> String { - format!( - "{}{}", - if platform.can_read(addr) { "R" } else { "-" }, - if platform.can_write(addr) { "W" } else { "-" }, - ) -}