From 36fc043264893d4e1f65327a48f205a70fc8aa2b Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Wed, 27 May 2026 22:36:30 -0500 Subject: [PATCH] feat(python): support vfs-backed open --- Cargo.lock | 38 +++-- README.md | 8 +- crates/bashkit/Cargo.toml | 2 +- crates/bashkit/docs/hooks.md | 4 +- crates/bashkit/docs/python.md | 34 +++-- crates/bashkit/docs/threat-model.md | 4 +- crates/bashkit/src/builtins/python.rs | 131 ++++++++++++++++-- crates/bashkit/src/tool.rs | 4 +- .../integration/python_integration_tests.rs | 96 +++++++++++++ .../integration/python_security_tests.rs | 4 +- .../tests/integration/threat_model_tests.rs | 6 +- deny.toml | 2 +- specs/python-builtin.md | 19 ++- specs/threat-model.md | 19 +-- supply-chain/audits.toml | 24 ++++ supply-chain/imports.lock | 37 ++++- 16 files changed, 356 insertions(+), 76 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fc6e60b9..784fc9923 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1745,9 +1745,9 @@ dependencies = [ [[package]] name = "get-size-derive2" -version = "0.7.4" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b6d1e2f75c16bfbcd0f95d84f99858a6e2f885c2287d1f5c3a96e8444a34b4" +checksum = "c9f8ab1b98a1284961d722ce994d9a0f3018ab1917618d4113824a72b9f71bc9" dependencies = [ "attribute-derive", "quote", @@ -1756,15 +1756,16 @@ dependencies = [ [[package]] name = "get-size2" -version = "0.7.4" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49cf31a6d70300cf81461098f7797571362387ef4bf85d32ac47eaa59b3a5a1a" +checksum = "b0cd0777a1057362cab35a779e0d79dacecb8d73e2c733eaafeb7ea917b08f03" dependencies = [ "compact_str", "get-size-derive2", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "ordermap", "smallvec", + "thin-vec", ] [[package]] @@ -2442,9 +2443,9 @@ dependencies = [ [[package]] name = "jiter" -version = "0.13.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020ba671987d7444d251d3ee5340be1bf4606cd6c0b53e6f4066b5a1ee376b22" +checksum = "7866f284df68dbc242796251ed9768bde2b58ebc4d7d32cc07cc7fd993e3ed6c" dependencies = [ "ahash", "bitvec", @@ -2811,10 +2812,9 @@ dependencies = [ [[package]] name = "monty" version = "0.0.17" -source = "git+https://github.com/pydantic/monty?rev=49faa4c#49faa4c8ae94490eae286e76259adcc35f64f0a5" +source = "git+https://github.com/pydantic/monty?rev=9b5f478#9b5f478288e8c1c0198ec4af60049738f93652c1" dependencies = [ "ahash", - "bytemuck", "chrono", "fancy-regex 0.17.0", "hashbrown 0.16.1", @@ -4200,7 +4200,7 @@ dependencies = [ [[package]] name = "ruff_python_ast" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=6ded4bed1651e30b34dd04cdaa50c763036abb0d#6ded4bed1651e30b34dd04cdaa50c763036abb0d" +source = "git+https://github.com/samuelcolvin/ruff.git?rev=6aaa91ac2b269df1414954ccd5134f0e6f5c6d30#6aaa91ac2b269df1414954ccd5134f0e6f5c6d30" dependencies = [ "aho-corasick", "bitflags", @@ -4212,13 +4212,14 @@ dependencies = [ "ruff_source_file", "ruff_text_size", "rustc-hash", + "thin-vec", "thiserror 2.0.18", ] [[package]] name = "ruff_python_parser" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=6ded4bed1651e30b34dd04cdaa50c763036abb0d#6ded4bed1651e30b34dd04cdaa50c763036abb0d" +source = "git+https://github.com/samuelcolvin/ruff.git?rev=6aaa91ac2b269df1414954ccd5134f0e6f5c6d30#6aaa91ac2b269df1414954ccd5134f0e6f5c6d30" dependencies = [ "bitflags", "bstr", @@ -4230,6 +4231,7 @@ dependencies = [ "ruff_text_size", "rustc-hash", "static_assertions", + "thin-vec", "unicode-ident", "unicode-normalization", "unicode_names2", @@ -4238,7 +4240,7 @@ dependencies = [ [[package]] name = "ruff_python_stdlib" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=6ded4bed1651e30b34dd04cdaa50c763036abb0d#6ded4bed1651e30b34dd04cdaa50c763036abb0d" +source = "git+https://github.com/samuelcolvin/ruff.git?rev=6aaa91ac2b269df1414954ccd5134f0e6f5c6d30#6aaa91ac2b269df1414954ccd5134f0e6f5c6d30" dependencies = [ "bitflags", "unicode-ident", @@ -4247,7 +4249,7 @@ dependencies = [ [[package]] name = "ruff_python_trivia" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=6ded4bed1651e30b34dd04cdaa50c763036abb0d#6ded4bed1651e30b34dd04cdaa50c763036abb0d" +source = "git+https://github.com/samuelcolvin/ruff.git?rev=6aaa91ac2b269df1414954ccd5134f0e6f5c6d30#6aaa91ac2b269df1414954ccd5134f0e6f5c6d30" dependencies = [ "itertools 0.14.0", "ruff_source_file", @@ -4258,7 +4260,7 @@ dependencies = [ [[package]] name = "ruff_source_file" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=6ded4bed1651e30b34dd04cdaa50c763036abb0d#6ded4bed1651e30b34dd04cdaa50c763036abb0d" +source = "git+https://github.com/samuelcolvin/ruff.git?rev=6aaa91ac2b269df1414954ccd5134f0e6f5c6d30#6aaa91ac2b269df1414954ccd5134f0e6f5c6d30" dependencies = [ "memchr", "ruff_text_size", @@ -4267,7 +4269,7 @@ dependencies = [ [[package]] name = "ruff_text_size" version = "0.0.0" -source = "git+https://github.com/astral-sh/ruff.git?rev=6ded4bed1651e30b34dd04cdaa50c763036abb0d#6ded4bed1651e30b34dd04cdaa50c763036abb0d" +source = "git+https://github.com/samuelcolvin/ruff.git?rev=6aaa91ac2b269df1414954ccd5134f0e6f5c6d30#6aaa91ac2b269df1414954ccd5134f0e6f5c6d30" dependencies = [ "get-size2", ] @@ -5226,6 +5228,12 @@ dependencies = [ "unicode-width 0.2.2", ] +[[package]] +name = "thin-vec" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f7e269b48f0a7dd0146680fa24b50cc67fc0373f086a5b2f99bd084639b482" + [[package]] name = "thiserror" version = "1.0.69" diff --git a/README.md b/README.md index bd7375978..05fe27948 100644 --- a/README.md +++ b/README.md @@ -341,17 +341,17 @@ bash.exec("python3 -c \"print(2 ** 10)\"").await?; // Script files from VFS bash.exec("python3 /tmp/script.py").await?; -// VFS bridging: pathlib.Path operations work with the virtual filesystem +// VFS bridging: open() and pathlib.Path work with the virtual filesystem bash.exec(r#"python3 -c " -from pathlib import Path -Path('/tmp/data.txt').write_text('hello from python') +with open('/tmp/data.txt', 'w') as f: + f.write('hello from python') ""#).await?; bash.exec("cat /tmp/data.txt").await?; // "hello from python" ``` Stdlib modules: `math`, `pathlib`, `os` (getenv/environ), `sys`, `typing`. Security note: `re` is intentionally disabled due to regex backtracking DoS risk. -Limitations: no `open()` (use `pathlib.Path`), no network, no classes, no third-party imports. +Limitations: file I/O is VFS-scoped, no network, no classes, no third-party imports. See [crates/bashkit/docs/python.md](crates/bashkit/docs/python.md) for the full guide. ## Experimental: TypeScript Support diff --git a/crates/bashkit/Cargo.toml b/crates/bashkit/Cargo.toml index 5c8fe73fb..0a1486700 100644 --- a/crates/bashkit/Cargo.toml +++ b/crates/bashkit/Cargo.toml @@ -98,7 +98,7 @@ unit-prefix = "0.5" os_display = "0.1.3" # Embedded Python interpreter (optional) -monty = { git = "https://github.com/pydantic/monty", rev = "49faa4c", optional = true } +monty = { git = "https://github.com/pydantic/monty", rev = "9b5f478", optional = true } # Embedded TypeScript interpreter (optional) zapcode-core = { version = "1.5", optional = true } diff --git a/crates/bashkit/docs/hooks.md b/crates/bashkit/docs/hooks.md index 274c282f6..0ba28ea0b 100644 --- a/crates/bashkit/docs/hooks.md +++ b/crates/bashkit/docs/hooks.md @@ -243,7 +243,7 @@ boundary stays in bashkit — hooks cannot bypass the allowlist. ### `before_http` — Filter or Modify Requests -```rust +```rust,ignore use bashkit::{Bash, NetworkAllowlist, hooks::{HookAction, HttpRequestEvent}}; # fn main() { @@ -266,7 +266,7 @@ let bash = Bash::builder() ### `after_http` — Observe Responses -```rust +```rust,ignore use bashkit::{Bash, NetworkAllowlist, hooks::{HookAction, HttpResponseEvent}}; use std::sync::{Arc, Mutex}; diff --git a/crates/bashkit/docs/python.md b/crates/bashkit/docs/python.md index 4d062e657..3e6b2137e 100644 --- a/crates/bashkit/docs/python.md +++ b/crates/bashkit/docs/python.md @@ -73,16 +73,15 @@ echo "print('piped')" | python3 ## Virtual Filesystem (VFS) Bridging -Python `pathlib.Path` operations are bridged to Bashkit's virtual filesystem. -Files created by bash are readable from Python and vice versa. +Python `open()` and `pathlib.Path` operations are bridged to Bashkit's virtual +filesystem. Files created by bash are readable from Python and vice versa. ### Bash → Python ```bash echo "important data" > /tmp/shared.txt python3 -c " -from pathlib import Path -content = Path('/tmp/shared.txt').read_text() +content = open('/tmp/shared.txt').read() print(f'Got: {content.strip()}') " ``` @@ -91,16 +90,20 @@ print(f'Got: {content.strip()}') ```bash python3 -c " -from pathlib import Path -_ = Path('/tmp/result.txt').write_text('computed by python\n') +with open('/tmp/result.txt', 'w') as f: + _ = f.write('computed by python\n') " cat /tmp/result.txt ``` -### Supported Path Operations +### Supported File Operations | Operation | Example | |-----------|---------| +| Open/read | `open('f.txt').read()` | +| Open/write | `open('f.txt', 'w').write('data')` | +| Open/append | `open('f.txt', 'a').write('more')` | +| Path open | `Path('f.txt').open('r')` | | Read text | `Path('f.txt').read_text()` | | Read bytes | `Path('f.txt').read_bytes()` | | Write text | `Path('f.txt').write_text('data')` | @@ -117,7 +120,7 @@ cat /tmp/result.txt ### Architecture ```text -Python code → Monty VM → OsCall(ReadText, path) → Bashkit VFS → resume +Python code → Monty VM → OsCall(Open/ReadText, path) → Bashkit VFS → resume ``` Monty pauses at filesystem operations, Bashkit bridges them to the VFS, then @@ -169,19 +172,20 @@ let help = tool.help(); // Includes a Markdown Notes section with Python hints ``` The builtin's `llm_hint()` is automatically included in the tool's documentation, -so LLMs know not to generate code using `open()`, HTTP requests, or classes. +so LLMs know file I/O is VFS-scoped and HTTP requests/classes are unavailable. ## Limitations -**No `open()` builtin.** Monty does not implement Python's `open()`. Use `pathlib.Path` instead: +**VFS-only file I/O.** `open()` and `pathlib.Path` read/write Bashkit's virtual +filesystem, not the host filesystem: ```python -# Won't work: -# f = open('data.txt') - -# Use instead: from pathlib import Path -content = Path('data.txt').read_text() + +with open('/tmp/data.txt', 'w') as f: + f.write('hello') + +content = Path('/tmp/data.txt').read_text() ``` **No HTTP/network.** No `socket`, `urllib`, `requests`, or `http.client` modules. diff --git a/crates/bashkit/docs/threat-model.md b/crates/bashkit/docs/threat-model.md index eb83d26bf..9434f0c02 100644 --- a/crates/bashkit/docs/threat-model.md +++ b/crates/bashkit/docs/threat-model.md @@ -554,7 +554,7 @@ attacks: ### Python / Monty Security (TM-PY-*) The `python`/`python3` builtins embed the Monty Python interpreter with VFS bridging. -Python `pathlib.Path` operations are bridged to Bashkit's virtual filesystem. +Python `pathlib.Path` and `open()` operations are bridged to Bashkit's virtual filesystem. | Threat | Attack Example | Mitigation | Status | |--------|---------------|------------|--------| @@ -562,7 +562,7 @@ Python `pathlib.Path` operations are bridged to Bashkit's virtual filesystem. | Memory exhaustion (TM-PY-002) | Large allocation | Monty max_memory (64MB) + max_allocations (1M) | MITIGATED | | Stack overflow (TM-PY-003) | Deep recursion | Monty max_recursion (200) | MITIGATED | | Shell escape (TM-PY-004) | `os.system()` | Monty has no os.system/subprocess | MITIGATED | -| Real FS access (TM-PY-005) | `open()` | Monty has no open() builtin | MITIGATED | +| Real FS access (TM-PY-005) | `open()` | VFS bridge opens only Bashkit VFS files | MITIGATED | | Error info leak (TM-PY-006) | Errors go to stdout | Errors go to stderr, not stdout | MITIGATED | | Real FS read (TM-PY-015) | `Path.read_text()` | VFS bridge reads only from Bashkit VFS | MITIGATED | | Real FS write (TM-PY-016) | `Path.write_text()` | VFS bridge writes only to Bashkit VFS | MITIGATED | diff --git a/crates/bashkit/src/builtins/python.rs b/crates/bashkit/src/builtins/python.rs index 6a82df4cc..bd04f3c90 100644 --- a/crates/bashkit/src/builtins/python.rs +++ b/crates/bashkit/src/builtins/python.rs @@ -9,17 +9,21 @@ //! # Overview //! //! Virtual Python execution with resource limits and VFS access. -//! Python `pathlib.Path` operations are bridged to Bashkit's virtual filesystem -//! via Monty's OsCall pause/resume mechanism. No real filesystem or network access. +//! Python `pathlib.Path` operations and `open()` file handles are bridged to +//! Bashkit's virtual filesystem via Monty's OsCall pause/resume mechanism. +//! No real filesystem or network access. +//! +//! Decision: `open()` performs only VFS open-time effects and returns Monty's +//! virtual file handle. Bashkit never holds host/native Python file handles. //! //! Supports: `python -c "code"`, `python script.py`, stdin piping. use async_trait::async_trait; use chrono::{Datelike, Timelike}; use monty::{ - ExcType, ExtFunctionResult, LimitedTracker, MontyDate, MontyDateTime, MontyException, - MontyObject, MontyRun, NameLookupResult, OsFunction, PrintWriter, ResourceLimits, RunProgress, - dir_stat, file_stat, symlink_stat, + ExcType, ExtFunctionResult, FileMode, LimitedTracker, MontyDate, MontyDateTime, MontyException, + MontyFileHandle, MontyObject, MontyRun, NameLookupResult, OsFunction, PrintWriter, + ResourceLimits, RunProgress, dir_stat, file_stat, symlink_stat, }; use std::collections::HashMap; use std::future::Future; @@ -181,8 +185,9 @@ impl std::fmt::Debug for PythonExternalFns { /// The python/python3 builtin command. /// /// Executes Python code using the embedded Monty interpreter (pydantic/monty). -/// Python `pathlib.Path` operations are bridged to Bashkit's VFS — files -/// created by bash (`cat > file`) are readable from Python, and vice versa. +/// Python `pathlib.Path` and `open()` operations are bridged to Bashkit's VFS +/// — files created by bash (`cat > file`) are readable from Python, and vice +/// versa. /// /// # Usage /// @@ -192,7 +197,7 @@ impl std::fmt::Debug for PythonExternalFns { /// echo "print('hello')" | python3 /// python3 -c "2 + 2" # expression result printed /// python3 --version -/// python3 -c "from pathlib import Path; print(Path('/tmp/f.txt').read_text())" +/// python3 -c "print(open('/tmp/f.txt').read())" /// ``` pub struct Python { /// Resource limits for the Monty interpreter. @@ -306,7 +311,7 @@ impl Builtin for Python { Some( "python/python3: Embedded Python (Monty). \ Stdlib: math, pathlib, os.getenv, sys, typing. \ - File I/O via pathlib.Path only (no open()). \ + File I/O via pathlib.Path and open() against the VFS. \ No HTTP/network. No classes. No third-party imports.", ) } @@ -631,6 +636,20 @@ async fn handle_os_call( }; match function { + OsFunction::Open => { + let mode = match parse_open_mode(args) { + Ok(mode) => mode, + Err(err) => return err, + }; + match open_vfs_file(&path, mode, fs).await { + Ok(()) => ExtFunctionResult::Return(MontyObject::FileHandle(MontyFileHandle { + path: path.to_string_lossy().to_string(), + mode, + position: 0, + })), + Err(e) => map_vfs_error(e, &path), + } + } OsFunction::Exists => { let exists = fs.exists(&path).await.unwrap_or(false); ExtFunctionResult::Return(MontyObject::Bool(exists)) @@ -674,7 +693,10 @@ async fn handle_os_call( )); } }; - let len = content.len(); + let len = match args.get(1) { + Some(MontyObject::String(s)) => s.chars().count(), + _ => 0, + }; match fs.write_file(&path, &content).await { Ok(()) => ExtFunctionResult::Return(MontyObject::Int(len as i64)), Err(e) => map_vfs_error(e, &path), @@ -696,6 +718,41 @@ async fn handle_os_call( Err(e) => map_vfs_error(e, &path), } } + OsFunction::AppendText => { + let content = match args.get(1) { + Some(MontyObject::String(s)) => s.as_bytes().to_vec(), + _ => { + return ExtFunctionResult::Error(MontyException::new( + ExcType::TypeError, + Some("append_text() requires a string argument".into()), + )); + } + }; + let len = match args.get(1) { + Some(MontyObject::String(s)) => s.chars().count(), + _ => 0, + }; + match append_vfs_file(&path, &content, fs).await { + Ok(()) => ExtFunctionResult::Return(MontyObject::Int(len as i64)), + Err(e) => map_vfs_error(e, &path), + } + } + OsFunction::AppendBytes => { + let content = match args.get(1) { + Some(MontyObject::Bytes(b)) => b.clone(), + _ => { + return ExtFunctionResult::Error(MontyException::new( + ExcType::TypeError, + Some("append_bytes() requires a bytes argument".into()), + )); + } + }; + let len = content.len(); + match append_vfs_file(&path, &content, fs).await { + Ok(()) => ExtFunctionResult::Return(MontyObject::Int(len as i64)), + Err(e) => map_vfs_error(e, &path), + } + } OsFunction::Mkdir => { let parents = get_bool_kwarg(kwargs, "parents").unwrap_or(false); let exist_ok = get_bool_kwarg(kwargs, "exist_ok").unwrap_or(false); @@ -783,10 +840,64 @@ async fn handle_os_call( fn extract_path(args: &[MontyObject], cwd: &Path) -> Option { match args.first()? { MontyObject::Path(s) | MontyObject::String(s) => Some(resolve_python_path(s, cwd)), + MontyObject::FileHandle(handle) => Some(resolve_python_path(&handle.path, cwd)), _ => None, } } +fn parse_open_mode(args: &[MontyObject]) -> std::result::Result { + let Some(MontyObject::String(mode)) = args.get(1) else { + return Err(ExtFunctionResult::Error(MontyException::new( + ExcType::TypeError, + Some("open() missing mode argument".into()), + ))); + }; + + mode.parse::().map_err(|msg| { + ExtFunctionResult::Error(MontyException::new( + ExcType::ValueError, + Some(msg.into_owned()), + )) + }) +} + +async fn open_vfs_file(path: &Path, mode: FileMode, fs: &Arc) -> Result<()> { + match mode { + FileMode::Read(_) | FileMode::ReadUpdate(_) => { + let meta = fs.stat(path).await?; + if meta.file_type.is_dir() { + return Err(std::io::Error::other("is a directory").into()); + } + } + FileMode::Write(_) | FileMode::WriteUpdate(_) => { + fs.write_file(path, b"").await?; + } + FileMode::Append(_) | FileMode::AppendUpdate(_) => { + if fs.exists(path).await.unwrap_or(false) { + let meta = fs.stat(path).await?; + if meta.file_type.is_dir() { + return Err(std::io::Error::other("is a directory").into()); + } + } else { + fs.write_file(path, b"").await?; + } + } + } + Ok(()) +} + +async fn append_vfs_file(path: &Path, content: &[u8], fs: &Arc) -> Result<()> { + let mut existing = match fs.read_file(path).await { + Ok(bytes) => bytes, + Err(e) if e.to_string().contains("not found") || e.to_string().contains("No such file") => { + Vec::new() + } + Err(e) => return Err(e), + }; + existing.extend_from_slice(content); + fs.write_file(path, &existing).await +} + /// Resolve a Python path string against cwd if relative. fn resolve_python_path(path_str: &str, cwd: &Path) -> PathBuf { let p = Path::new(path_str); diff --git a/crates/bashkit/src/tool.rs b/crates/bashkit/src/tool.rs index 240177779..d28958f02 100644 --- a/crates/bashkit/src/tool.rs +++ b/crates/bashkit/src/tool.rs @@ -1856,8 +1856,8 @@ mod tests { let helptext = tool.help(); assert!(helptext.contains("python"), "help should mention python"); assert!( - helptext.contains("no open()"), - "help should document open() limitation" + helptext.contains("open() against the VFS"), + "help should document VFS-scoped open() support" ); assert!( helptext.contains("No HTTP"), diff --git a/crates/bashkit/tests/integration/python_integration_tests.rs b/crates/bashkit/tests/integration/python_integration_tests.rs index 68454e840..b2c8f9a58 100644 --- a/crates/bashkit/tests/integration/python_integration_tests.rs +++ b/crates/bashkit/tests/integration/python_integration_tests.rs @@ -679,6 +679,102 @@ mod vfs_bridging { assert_eq!(r.stdout, "roundtrip\n"); } + #[tokio::test] + async fn open_reads_vfs_file() { + let mut bash = bash_python(); + bash.exec("printf 'from vfs' > /tmp/open-read.txt") + .await + .unwrap(); + let r = bash + .exec("python3 -c \"with open('/tmp/open-read.txt', 'r') as f:\n print(f.read())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "from vfs\n"); + } + + #[tokio::test] + async fn open_writes_vfs_file() { + let mut bash = bash_python(); + let r = bash + .exec( + "python3 -c \"with open('/tmp/open-write.txt', 'w') as f:\n print(f.write('from open'))\"\ncat /tmp/open-write.txt", + ) + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "9\nfrom open"); + } + + #[tokio::test] + async fn open_write_text_returns_character_count() { + let mut bash = bash_python(); + let r = bash + .exec( + "python3 -c \"with open('/tmp/open-unicode.txt', 'w') as f:\n print(f.write('βeta'))\"\nwc -c /tmp/open-unicode.txt", + ) + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.starts_with("4\n")); + assert!(r.stdout.contains("5 /tmp/open-unicode.txt")); + } + + #[tokio::test] + async fn path_open_writes_vfs_file() { + let mut bash = bash_python(); + let r = bash + .exec( + "python3 -c \"from pathlib import Path\nwith Path('/tmp/path-open.txt').open('w') as f:\n f.write('via path')\"\ncat /tmp/path-open.txt", + ) + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "via path"); + } + + #[tokio::test] + async fn open_append_text_preserves_existing_content() { + let mut bash = bash_python(); + bash.exec("printf 'first' > /tmp/open-append.txt") + .await + .unwrap(); + let r = bash + .exec( + "python3 -c \"with open('/tmp/open-append.txt', 'a') as f:\n f.write('+second')\"\ncat /tmp/open-append.txt", + ) + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "first+second"); + } + + #[tokio::test] + async fn open_append_text_creates_missing_file() { + let mut bash = bash_python(); + let r = bash + .exec( + "python3 -c \"with open('/tmp/open-append-new.txt', 'a') as f:\n f.write('created')\"\ncat /tmp/open-append-new.txt", + ) + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "created"); + } + + #[tokio::test] + async fn open_append_bytes_preserves_existing_content() { + let mut bash = bash_python(); + let r = bash + .exec( + "python3 -c \"open('/tmp/open-append.bin', 'wb').write(b'\\x00\\x01')\nopen('/tmp/open-append.bin', 'ab').write(b'\\x02')\nprint(len(open('/tmp/open-append.bin', 'rb').read()))\"", + ) + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "3\n"); + } + #[tokio::test] async fn path_exists() { let mut bash = bash_python(); diff --git a/crates/bashkit/tests/integration/python_security_tests.rs b/crates/bashkit/tests/integration/python_security_tests.rs index d0d7c0e0e..af9991297 100644 --- a/crates/bashkit/tests/integration/python_security_tests.rs +++ b/crates/bashkit/tests/integration/python_security_tests.rs @@ -192,13 +192,13 @@ mod blackbox_builtins { } #[tokio::test] - async fn no_open_builtin() { + async fn open_builtin_stays_in_vfs() { let mut bash = bash_python(); let r = bash .exec("python3 -c \"f = open('/etc/passwd', 'r')\nprint(f.read())\"") .await .unwrap(); - assert_ne!(r.exit_code, 0, "open() should not be available"); + assert_ne!(r.exit_code, 0, "open() must not read the host filesystem"); assert!(!r.stdout.contains("root:")); } diff --git a/crates/bashkit/tests/integration/threat_model_tests.rs b/crates/bashkit/tests/integration/threat_model_tests.rs index f13247feb..7bf0b4be9 100644 --- a/crates/bashkit/tests/integration/threat_model_tests.rs +++ b/crates/bashkit/tests/integration/threat_model_tests.rs @@ -1340,12 +1340,12 @@ mod python_security { async fn threat_python_no_filesystem() { let mut bash = bash_with_python(); - // open() builtin should not be available (Monty doesn't expose it) + // open() resolves through Bashkit VFS, not the host filesystem. let result = bash .exec("python3 -c \"f = open('/etc/passwd')\nprint(f.read())\"") .await .unwrap(); - assert_ne!(result.exit_code, 0, "file open should fail"); + assert_ne!(result.exit_code, 0, "host file open should fail"); assert!( !result.stdout.contains("root:"), "Should not read real /etc/passwd" @@ -2189,6 +2189,7 @@ mod builtin_parser_depth { } /// TM-DOS-027: Deeply nested JSON input to jq must not crash + #[cfg(feature = "jq")] #[tokio::test] async fn threat_jq_deep_json_nesting_safe() { let mut bash = Bash::new(); @@ -2222,6 +2223,7 @@ mod builtin_parser_depth { } /// TM-DOS-027: Moderate nesting in jq still works + #[cfg(feature = "jq")] #[tokio::test] async fn threat_jq_moderate_nesting_works() { let mut bash = Bash::new(); diff --git a/deny.toml b/deny.toml index c26ecfc6e..1af44c56d 100644 --- a/deny.toml +++ b/deny.toml @@ -53,5 +53,5 @@ unknown-git = "deny" # Allow git sources for Monty (not yet on crates.io) and its ruff deps allow-git = [ "https://github.com/pydantic/monty", - "https://github.com/astral-sh/ruff", + "https://github.com/samuelcolvin/ruff", ] diff --git a/specs/python-builtin.md b/specs/python-builtin.md index a04122510..29f8c1345 100644 --- a/specs/python-builtin.md +++ b/specs/python-builtin.md @@ -124,7 +124,7 @@ Monty implements a subset of Python 3.12: - String operations, f-strings - Exception handling: try/except/finally/raise - Property descriptors (`@property`) (since Monty 0.0.4) -- Built-in functions: print, len, range, enumerate, zip, map, filter, sorted, reversed, sum, min, max, abs, round, int, float, str, bool, list, dict, tuple, set, type, isinstance, hasattr, getattr, id, repr, ord, chr, hex, oct, bin, all, any, input +- Built-in functions: print, len, range, enumerate, zip, map, filter, sorted, reversed, sum, min, max, abs, round, int, float, str, bool, list, dict, tuple, set, type, isinstance, hasattr, getattr, id, repr, ord, chr, hex, oct, bin, all, any, input, open - Standard modules: sys, typing, math (~50 functions), pathlib, os (getenv/environ), json, datetime - `datetime.date.today()`, `datetime.datetime.now()` with optional timezone (since Monty 0.0.11) - JSON: `json.dumps()`, `json.loads()` (since Monty 0.0.9) @@ -135,25 +135,23 @@ Monty implements a subset of Python 3.12: - Classes (planned upstream) - Match statements - Import of third-party libraries -- `open()` builtin — Monty does not implement `open()`. Use `pathlib.Path` instead: - `Path('f.txt').read_text()` not `open('f.txt').read()` - HTTP/network I/O — no `socket`, `urllib`, `requests`, `http.client` modules. Monty has no OsCall variants for network operations, so there is no way to bridge these. - Most standard library modules ### VFS Bridging -Python `pathlib.Path` operations are bridged to Bashkit's virtual filesystem -via Monty's OsCall pause/resume mechanism. This enables Python code to read -and write files that are shared with the bash environment. +Python `pathlib.Path` and `open()` operations are bridged to Bashkit's virtual +filesystem via Monty's OsCall pause/resume mechanism. This enables Python code +to read and write files that are shared with the bash environment. ```bash # Write from bash, read from Python echo "data" > /tmp/shared.txt -python3 -c "from pathlib import Path; print(Path('/tmp/shared.txt').read_text())" +python3 -c "print(open('/tmp/shared.txt').read())" # Write from Python, read from bash -python3 -c "from pathlib import Path; Path('/tmp/out.txt').write_text('hello\n')" +python3 -c "with open('/tmp/out.txt', 'w') as f: f.write('hello\n')" cat /tmp/out.txt # Create directories, check existence @@ -168,6 +166,7 @@ python3 -c "import os; print(os.getenv('HOME'))" **Supported operations:** - `Path.read_text()`, `Path.read_bytes()` — read from VFS - `Path.write_text()`, `Path.write_bytes()` — write to VFS +- `open()`, `Path.open()` — read, write, and append files in the VFS - `Path.exists()`, `Path.is_file()`, `Path.is_dir()`, `Path.is_symlink()` - `Path.mkdir()` (with `parents=True`, `exist_ok=True` kwargs) - `Path.unlink()`, `Path.rmdir()` — delete from VFS @@ -181,7 +180,7 @@ python3 -c "import os; print(os.getenv('HOME'))" **Architecture:** ``` -Python code → Monty VM → OsCall(ReadText, path) → Bashkit VFS → resume +Python code → Monty VM → OsCall(Open/ReadText, path) → Bashkit VFS → resume ``` Monty pauses execution at filesystem operations, yields an `OsCall` event @@ -310,7 +309,7 @@ Relative paths are resolved against the shell's cwd. Path traversal via When Python is registered via `BashToolBuilder::python()`, the builtin contributes a hint to `help()` and `system_prompt()` documenting its limitations: -> python/python3: Embedded Python (Monty). Stdlib: math, pathlib, os.getenv, sys, typing. File I/O via pathlib.Path only (no open()). No HTTP/network. No classes. No third-party imports. +> python/python3: Embedded Python (Monty). Stdlib: math, pathlib, os.getenv, sys, typing. File I/O via pathlib.Path and open() against the VFS. No HTTP/network. No classes. No third-party imports. Regex module `re` is intentionally disabled in Bashkit due to catastrophic backtracking DoS risk in untrusted code execution. diff --git a/specs/threat-model.md b/specs/threat-model.md index 2c84290f9..14785b697 100644 --- a/specs/threat-model.md +++ b/specs/threat-model.md @@ -1666,9 +1666,9 @@ The following components are fuzz-tested for robustness: > runtime. This integration should be treated as experimental. Bashkit embeds the Monty Python interpreter (pydantic/monty) with VFS bridging. -Python `pathlib.Path` operations are bridged to Bashkit's virtual filesystem via -Monty's OsCall pause/resume mechanism. This section covers threats specific to -the Python builtin. +Python `pathlib.Path` and `open()` operations are bridged to Bashkit's virtual +filesystem via Monty's OsCall pause/resume mechanism. This section covers +threats specific to the Python builtin. ### Architecture @@ -1676,8 +1676,8 @@ the Python builtin. Python code → Monty VM → OsCall pause → Bashkit VFS bridge → resume ``` -Monty never touches the real filesystem. All `Path.*` operations yield `OsCall` -events that Bashkit intercepts and dispatches to the VFS. +Monty never touches the real filesystem. All `Path.*` and `open()` operations +yield `OsCall` events that Bashkit intercepts and dispatches to the VFS. ### Threats @@ -1687,7 +1687,7 @@ events that Bashkit intercepts and dispatches to the VFS. | TM-PY-002 | Memory exhaustion via large allocation | High | Monty max_memory (64MB) + max_allocations (1M) | `threat_python_memory_exhaustion` | | TM-PY-003 | Stack overflow via deep recursion | High | Monty max_recursion (200) + parser depth limit (200, since 0.0.4) | `threat_python_recursion_bomb` | | TM-PY-004 | Shell escape via os.system/subprocess | Critical | Monty has no os.system/subprocess implementation | `threat_python_no_os_operations` | -| TM-PY-005 | Real filesystem access via open() | Critical | Monty has no open() builtin | `threat_python_no_filesystem` | +| TM-PY-005 | Real filesystem access via open() | Critical | VFS bridge opens only Bashkit VFS files, not host files | `threat_python_no_filesystem` | | TM-PY-006 | Error info leakage via stdout | Medium | Errors go to stderr, not stdout | `threat_python_error_isolation` | | TM-PY-015 | Real filesystem read via pathlib | Critical | VFS bridge reads only from Bashkit VFS, not host | `threat_python_vfs_no_real_fs` | | TM-PY-016 | Real filesystem write via pathlib | Critical | VFS bridge writes only to Bashkit VFS | `threat_python_vfs_write_sandboxed` | @@ -1740,10 +1740,11 @@ and this information has low sensitivity. No filesystem or network access is gra ### VFS Bridge Security Properties -1. **No real filesystem access**: All Path operations go through Bashkit's VFS. +1. **No real filesystem access**: All Path and open operations go through Bashkit's VFS. `/etc/passwd` in Python reads from VFS, not the host. 2. **Shared VFS with bash**: Files written by `echo > file` are readable by - Python's `Path(file).read_text()`, and vice versa. This is intentional. + Python's `open(file).read()` / `Path(file).read_text()`, and vice versa. + This is intentional. 3. **Path resolution**: Relative paths are resolved against the shell's cwd. Path traversal (`../..`) is constrained by VFS path normalization. 4. **Error mapping**: VFS errors are mapped to standard Python exceptions @@ -1766,10 +1767,12 @@ filesystem. | Path.is_file() | fs.stat() | bool | | Path.is_dir() | fs.stat() | bool | | Path.is_symlink() | fs.stat() | bool | +| open(), Path.open() | fs.stat() / fs.write_file() | file handle | | Path.read_text() | fs.read_file() | str | | Path.read_bytes() | fs.read_file() | bytes | | Path.write_text() | fs.write_file() | int | | Path.write_bytes() | fs.write_file() | int | +| file.write() append mode | fs.read_file() + fs.write_file() | int | | Path.mkdir() | fs.mkdir() | None | | Path.unlink() | fs.remove() | None | | Path.rmdir() | fs.remove() | None | diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index 5be5cc26b..1b8d4f3d4 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -53,6 +53,18 @@ who = "Mykhailo Chalyi " criteria = "safe-to-deploy" version = "2.4.0" +[[audits.get-size-derive2]] +who = "Mykhailo Chalyi " +criteria = "safe-to-deploy" +delta = "0.7.4 -> 0.9.0" +notes = "Reviewed delta: published crate changes only version metadata; derive macro source is unchanged and introduces no new generated code behavior or ambient capabilities." + +[[audits.get-size2]] +who = "Mykhailo Chalyi " +criteria = "safe-to-deploy" +delta = "0.7.4 -> 0.9.0" +notes = "Reviewed delta: adds optional GetSize impls for dashmap, half, and parking_lot plus test refactors; no filesystem/process/network capabilities, and lock access is limited to normal Mutex/RwLock guards." + [[audits.hybrid-array]] who = "Mykhailo Chalyi " criteria = "safe-to-deploy" @@ -63,6 +75,12 @@ who = "Mykhailo Chalyi " criteria = "safe-to-deploy" version = "0.7.12" +[[audits.jiter]] +who = "Mykhailo Chalyi " +criteria = "safe-to-deploy" +delta = "0.13.0 -> 0.15.0" +notes = "Reviewed delta: number parsing API refactor, NumberFloat export, Rust 2024 explicit unsafe blocks, and string-cache allocation moved off stack; no new filesystem/process/network capabilities, existing SIMD/PyString unsafe paths remain bounded by parser checks and ASCII preconditions." + [[audits.napi]] who = "Mykhailo Chalyi " criteria = "safe-to-deploy" @@ -104,6 +122,12 @@ criteria = "safe-to-deploy" delta = "0.103.12 -> 0.103.13" notes = "Reviewed delta: fixes BIT STRING/CRL parsing edge cases and preserves fail-closed URI constraint handling; no new unsafe code or ambient capability changes." +[[audits.thin-vec]] +who = "Mykhailo Chalyi " +criteria = "safe-to-deploy" +delta = "0.2.16 -> 0.2.18" +notes = "Reviewed delta: no_std feature compatibility fixes and std-to-core path changes in gecko-ffi code; no new allocation semantics, unsafe behavior, or ambient capabilities." + [[audits.tokio]] who = "Mykhailo Chalyi " criteria = "safe-to-deploy" diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock index 398cbd161..8fa9ad8d3 100644 --- a/supply-chain/imports.lock +++ b/supply-chain/imports.lock @@ -90,7 +90,7 @@ who = "Manish Goregaokar " criteria = "safe-to-deploy" user-id = 1139 # Manish Goregaokar (Manishearth) start = "2019-11-06" -end = "2026-02-01" +end = "2027-04-23" notes = "All code written or reviewed by Manish" aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml" @@ -108,7 +108,7 @@ who = "Manish Goregaokar " criteria = "safe-to-deploy" user-id = 1139 # Manish Goregaokar (Manishearth) start = "2019-07-25" -end = "2026-02-01" +end = "2027-04-23" notes = "All code written or reviewed by Manish" aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml" @@ -314,6 +314,39 @@ criteria = "safe-to-deploy" delta = "0.16.1 -> 0.16.2" aggregated-from = "https://raw.githubusercontent.com/mozilla/cargo-vet/main/supply-chain/audits.toml" +[[audits.mozilla.audits.thin-vec]] +who = "Aria Beingessner " +criteria = "safe-to-deploy" +version = "0.2.5" +notes = "I own this crate, and most of its versions were codeveloped and reviewed by Nika Layzell. This version was not explicitly reviewed by her, but it was specifically a release that made the code pass miri and was reviewed by me. Firefox uses it in the gecko-ffi configuration which is less thoroughly tested and more dangerous but we're reasonably confident in it. The real danger is from C++ code failing to use it correctly in FFI but that's just how FFI is." +aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml" + +[[audits.mozilla.audits.thin-vec]] +who = "Mike Hommey " +criteria = "safe-to-deploy" +delta = "0.2.5 -> 0.2.7" +aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml" + +[[audits.mozilla.audits.thin-vec]] +who = "Mike Hommey " +criteria = "safe-to-deploy" +delta = "0.2.7 -> 0.2.12" +aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml" + +[[audits.mozilla.audits.thin-vec]] +who = "Emilio Cobos Álvarez " +criteria = "safe-to-deploy" +delta = "0.2.12 -> 0.2.14" +notes = "Minor API additions, trivial no-std support, and minor inlining tweaks." +aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml" + +[[audits.mozilla.audits.thin-vec]] +who = "Emilio Cobos Álvarez " +criteria = "safe-to-deploy" +delta = "0.2.14 -> 0.2.16" +notes = "All patches either authored or reviewed by me." +aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml" + [[audits.mozilla.audits.unicode-linebreak]] who = "Jan-Erik Rediger " criteria = "safe-to-deploy"