diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..1ae6d56 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,82 @@ +name: CI + +on: + push: + branches: [main, "claude/**"] + pull_request: + branches: [main] + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + +jobs: + check: + name: Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: Check + run: cargo check --workspace --all-targets + + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + - name: Check formatting + run: cargo fmt --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + - uses: Swatinem/rust-cache@v2 + - name: Clippy + run: cargo clippy --workspace --all-targets -- -D warnings + + test: + name: Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: Run tests + run: cargo test --workspace + + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: Build release + run: cargo build --workspace --release + - name: Upload CLI artifact + uses: actions/upload-artifact@v4 + with: + name: webfetch-cli-linux + path: target/release/webfetch + + doc: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: Check documentation + run: cargo doc --workspace --no-deps + env: + RUSTDOCFLAGS: -D warnings diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..88f7c1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +# Build output +/target/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# OS +.DS_Store + +# Python +__pycache__/ +*.py[cod] +*.egg-info/ +.eggs/ +dist/ +build/ + +# Testing +*.profraw +coverage/ diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..9bd7181 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,2174 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "async-compression" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "brotli" +version = "8.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + +[[package]] +name = "cc" +version = "1.2.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "clap" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "compression-codecs" +version = "0.4.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" +dependencies = [ + "brotli", + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "deadpool" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" +dependencies = [ + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "find-msvc-tools" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" + +[[package]] +name = "flate2" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-util" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "portable-atomic" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2", + "thiserror 2.0.17", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.17", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.60.2", +] + +[[package]] +name = "quote" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "webpki-roots", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustls" +version = "0.23.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" + +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +dependencies = [ + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-test" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6d24790a10a7af737693a3e8f1d03faef7e6ca0cc99aae5066f533766de545" +dependencies = [ + "futures-core", + "tokio", + "tokio-stream", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "async-compression", + "bitflags", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "iri-string", + "pin-project-lite", + "tokio", + "tokio-util", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webfetch" +version = "0.1.0" +dependencies = [ + "bytes", + "futures", + "reqwest", + "schemars", + "serde", + "serde_json", + "thiserror 1.0.69", + "tokio", + "tokio-test", + "tracing", + "tracing-subscriber", + "url", + "wiremock", +] + +[[package]] +name = "webfetch-cli" +version = "0.1.0" +dependencies = [ + "clap", + "serde", + "serde_json", + "tokio", + "webfetch", +] + +[[package]] +name = "webfetch-python" +version = "0.1.0" +dependencies = [ + "pyo3", + "serde_json", + "tokio", + "webfetch", +] + +[[package]] +name = "webpki-roots" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64", + "deadpool", + "futures", + "http", + "http-body-util", + "hyper", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8f3f50b848df28f887acb68e41201b5aea6bc8a8dacc00fb40635ff9a72fea" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4340e8c --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,44 @@ +[workspace] +resolver = "2" +members = ["crates/*"] + +[workspace.package] +version = "0.1.0" +edition = "2021" +license = "MIT" +authors = ["Everruns"] +repository = "https://github.com/everruns/webfetch" +description = "AI-friendly webfetch tool, CLI, MCP server, and library" + +[workspace.dependencies] +# Async runtime +tokio = { version = "1", features = ["rt-multi-thread", "macros", "time", "sync"] } + +# HTTP client +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "gzip", "brotli", "deflate", "stream"] } + +# Serialization +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +# Schema generation +schemars = "0.8" + +# URL parsing +url = "2" + +# Logging +tracing = "0.1" + +# Error handling +thiserror = "1" + +# CLI +clap = { version = "4", features = ["derive"] } + +# Async streaming +futures = "0.3" +bytes = "1" + +# Testing +wiremock = "0.6" diff --git a/crates/webfetch-cli/Cargo.toml b/crates/webfetch-cli/Cargo.toml new file mode 100644 index 0000000..fe22ef7 --- /dev/null +++ b/crates/webfetch-cli/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "webfetch-cli" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +description = "CLI for the WebFetch tool" + +[[bin]] +name = "webfetch" +path = "src/main.rs" + +[dependencies] +webfetch = { path = "../webfetch" } +tokio = { workspace = true } +clap = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } diff --git a/crates/webfetch-cli/src/main.rs b/crates/webfetch-cli/src/main.rs new file mode 100644 index 0000000..ff761b3 --- /dev/null +++ b/crates/webfetch-cli/src/main.rs @@ -0,0 +1,132 @@ +//! WebFetch CLI - Command-line interface for fetching web content + +mod mcp; + +use clap::{Parser, Subcommand}; +use webfetch::{HttpMethod, Tool, WebFetchRequest, TOOL_LLMTXT}; + +/// WebFetch - AI-friendly web content fetching tool +#[derive(Parser, Debug)] +#[command(name = "webfetch")] +#[command(author, version, about, long_about = None)] +struct Cli { + #[command(subcommand)] + command: Option, + + /// URL to fetch + #[arg(long)] + url: Option, + + /// HTTP method (GET or HEAD) + #[arg(long, default_value = "GET")] + method: String, + + /// Convert HTML to markdown + #[arg(long)] + as_markdown: bool, + + /// Convert HTML to plain text + #[arg(long)] + as_text: bool, + + /// Custom User-Agent + #[arg(long)] + user_agent: Option, + + /// Print full help with examples (llmtxt) + #[arg(long)] + llmtxt: bool, +} + +#[derive(Subcommand, Debug)] +enum Commands { + /// Run as MCP (Model Context Protocol) server over stdio + Mcp, +} + +#[tokio::main] +async fn main() { + let cli = Cli::parse(); + + // Handle --llmtxt flag + if cli.llmtxt { + println!("{}", TOOL_LLMTXT); + std::process::exit(0); + } + + match cli.command { + Some(Commands::Mcp) => { + mcp::run_server().await; + } + None => { + // Fetch mode + if let Some(url) = cli.url { + run_fetch( + &url, + &cli.method, + cli.as_markdown, + cli.as_text, + cli.user_agent, + ) + .await; + } else { + eprintln!("Error: Missing required parameter: url"); + eprintln!("Usage: webfetch --url "); + eprintln!(" or: webfetch mcp"); + std::process::exit(1); + } + } + } +} + +async fn run_fetch( + url: &str, + method: &str, + as_markdown: bool, + as_text: bool, + user_agent: Option, +) { + // Parse method + let method = match method.to_uppercase().as_str() { + "GET" => HttpMethod::Get, + "HEAD" => HttpMethod::Head, + _ => { + eprintln!("Error: Invalid method: must be GET or HEAD"); + std::process::exit(1); + } + }; + + // Build request + let mut request = WebFetchRequest::new(url).method(method); + + if as_markdown { + request = request.as_markdown(); + } + if as_text { + request = request.as_text(); + } + + // Build tool + let mut builder = Tool::builder().enable_markdown(true).enable_text(true); + + if let Some(ua) = user_agent { + builder = builder.user_agent(ua); + } + + let tool = builder.build(); + + // Execute request + match tool.execute(request).await { + Ok(response) => { + let json = serde_json::to_string_pretty(&response).unwrap_or_else(|e| { + eprintln!("Error serializing response: {}", e); + std::process::exit(1); + }); + println!("{}", json); + } + Err(e) => { + eprintln!("Error: {}", e); + std::process::exit(1); + } + } +} diff --git a/crates/webfetch-cli/src/mcp.rs b/crates/webfetch-cli/src/mcp.rs new file mode 100644 index 0000000..331ba38 --- /dev/null +++ b/crates/webfetch-cli/src/mcp.rs @@ -0,0 +1,212 @@ +//! MCP (Model Context Protocol) server implementation + +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use std::io::{self, BufRead, Write}; +use webfetch::{Tool, WebFetchRequest, TOOL_DESCRIPTION}; + +/// JSON-RPC 2.0 request +#[derive(Debug, Deserialize)] +#[allow(dead_code)] +struct JsonRpcRequest { + jsonrpc: String, + id: Option, + method: String, + #[serde(default)] + params: Value, +} + +/// JSON-RPC 2.0 response +#[derive(Debug, Serialize)] +struct JsonRpcResponse { + jsonrpc: &'static str, + #[serde(skip_serializing_if = "Option::is_none")] + id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + result: Option, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, +} + +/// JSON-RPC 2.0 error +#[derive(Debug, Serialize)] +struct JsonRpcError { + code: i32, + message: String, + #[serde(skip_serializing_if = "Option::is_none")] + data: Option, +} + +impl JsonRpcResponse { + fn success(id: Option, result: Value) -> Self { + Self { + jsonrpc: "2.0", + id, + result: Some(result), + error: None, + } + } + + fn error(id: Option, code: i32, message: impl Into) -> Self { + Self { + jsonrpc: "2.0", + id, + result: None, + error: Some(JsonRpcError { + code, + message: message.into(), + data: None, + }), + } + } +} + +/// MCP Server implementation +struct McpServer { + tool: Tool, +} + +impl McpServer { + fn new() -> Self { + Self { + tool: Tool::default(), + } + } + + async fn handle_request(&self, request: JsonRpcRequest) -> JsonRpcResponse { + match request.method.as_str() { + "initialize" => self.handle_initialize(request.id), + "tools/list" => self.handle_tools_list(request.id), + "tools/call" => self.handle_tools_call(request.id, request.params).await, + "notifications/initialized" => { + // This is a notification, no response needed + JsonRpcResponse::success(request.id, json!(null)) + } + _ => JsonRpcResponse::error( + request.id, + -32601, + format!("Method not found: {}", request.method), + ), + } + } + + fn handle_initialize(&self, id: Option) -> JsonRpcResponse { + JsonRpcResponse::success( + id, + json!({ + "protocolVersion": "2024-11-05", + "capabilities": { + "tools": {} + }, + "serverInfo": { + "name": "webfetch", + "version": env!("CARGO_PKG_VERSION") + } + }), + ) + } + + fn handle_tools_list(&self, id: Option) -> JsonRpcResponse { + let input_schema = self.tool.input_schema(); + + JsonRpcResponse::success( + id, + json!({ + "tools": [{ + "name": "webfetch", + "description": TOOL_DESCRIPTION, + "inputSchema": input_schema + }] + }), + ) + } + + async fn handle_tools_call(&self, id: Option, params: Value) -> JsonRpcResponse { + let tool_name = params + .get("name") + .and_then(|v| v.as_str()) + .unwrap_or_default(); + + if tool_name != "webfetch" { + return JsonRpcResponse::error(id, -32602, format!("Unknown tool: {}", tool_name)); + } + + let arguments = params.get("arguments").cloned().unwrap_or(json!({})); + + // Parse request + let request: WebFetchRequest = match serde_json::from_value(arguments) { + Ok(req) => req, + Err(e) => { + return JsonRpcResponse::error(id, -32602, format!("Invalid arguments: {}", e)); + } + }; + + // Execute tool + match self.tool.execute(request).await { + Ok(response) => { + let content = serde_json::to_value(&response).unwrap_or(json!({})); + JsonRpcResponse::success( + id, + json!({ + "content": [{ + "type": "text", + "text": serde_json::to_string_pretty(&content).unwrap_or_default() + }] + }), + ) + } + Err(e) => JsonRpcResponse::success( + id, + json!({ + "content": [{ + "type": "text", + "text": format!("Error: {}", e) + }], + "isError": true + }), + ), + } + } +} + +/// Run the MCP server over stdio +pub async fn run_server() { + let server = McpServer::new(); + let stdin = io::stdin(); + let mut stdout = io::stdout(); + + for line in stdin.lock().lines() { + let line = match line { + Ok(l) => l, + Err(e) => { + eprintln!("Error reading stdin: {}", e); + continue; + } + }; + + if line.is_empty() { + continue; + } + + let request: JsonRpcRequest = match serde_json::from_str(&line) { + Ok(req) => req, + Err(e) => { + let response = JsonRpcResponse::error(None, -32700, format!("Parse error: {}", e)); + let json = serde_json::to_string(&response).unwrap_or_default(); + let _ = writeln!(stdout, "{}", json); + let _ = stdout.flush(); + continue; + } + }; + + // Skip notifications (no id) + if request.id.is_none() && request.method.starts_with("notifications/") { + continue; + } + + let response = server.handle_request(request).await; + let json = serde_json::to_string(&response).unwrap_or_default(); + let _ = writeln!(stdout, "{}", json); + let _ = stdout.flush(); + } +} diff --git a/crates/webfetch-python/Cargo.toml b/crates/webfetch-python/Cargo.toml new file mode 100644 index 0000000..b1f563a --- /dev/null +++ b/crates/webfetch-python/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "webfetch-python" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +description = "Python bindings for the WebFetch library" + +[lib] +name = "webfetch_py" +crate-type = ["cdylib"] + +[dependencies] +webfetch = { path = "../webfetch" } +pyo3 = { version = "0.22", features = ["extension-module"] } +tokio = { workspace = true } +serde_json = { workspace = true } diff --git a/crates/webfetch-python/src/lib.rs b/crates/webfetch-python/src/lib.rs new file mode 100644 index 0000000..f6cfafa --- /dev/null +++ b/crates/webfetch-python/src/lib.rs @@ -0,0 +1,281 @@ +//! Python bindings for WebFetch +//! +//! This module exposes the WebFetch tool contract to Python. + +// Allow false positive clippy warning from pyo3 macro expansion +#![allow(clippy::useless_conversion)] + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use webfetch::{FetchError, HttpMethod, Tool, ToolBuilder, WebFetchRequest, WebFetchResponse}; + +/// Convert FetchError to PyErr +fn to_py_err(e: FetchError) -> PyErr { + PyValueError::new_err(e.to_string()) +} + +/// Python wrapper for WebFetchRequest +#[pyclass(name = "WebFetchRequest")] +#[derive(Clone)] +pub struct PyWebFetchRequest { + inner: WebFetchRequest, +} + +#[pymethods] +impl PyWebFetchRequest { + /// Create a new request + #[new] + #[pyo3(signature = (url, method=None, as_markdown=None, as_text=None))] + fn new( + url: String, + method: Option, + as_markdown: Option, + as_text: Option, + ) -> PyResult { + let mut req = WebFetchRequest::new(url); + + if let Some(m) = method { + req.method = Some(m.parse::().map_err(PyValueError::new_err)?); + } + + req.as_markdown = as_markdown; + req.as_text = as_text; + + Ok(Self { inner: req }) + } + + /// Get URL + #[getter] + fn url(&self) -> &str { + &self.inner.url + } + + /// Get method + #[getter] + fn method(&self) -> Option { + self.inner.method.map(|m| m.to_string()) + } + + /// Get as_markdown flag + #[getter] + fn as_markdown(&self) -> Option { + self.inner.as_markdown + } + + /// Get as_text flag + #[getter] + fn as_text(&self) -> Option { + self.inner.as_text + } + + /// Convert to JSON string + fn to_json(&self) -> PyResult { + serde_json::to_string(&self.inner).map_err(|e| PyValueError::new_err(e.to_string())) + } + + /// Create from JSON string + #[staticmethod] + fn from_json(json: &str) -> PyResult { + let inner: WebFetchRequest = + serde_json::from_str(json).map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(Self { inner }) + } +} + +/// Python wrapper for WebFetchResponse +#[pyclass(name = "WebFetchResponse")] +#[derive(Clone)] +pub struct PyWebFetchResponse { + inner: WebFetchResponse, +} + +#[pymethods] +impl PyWebFetchResponse { + #[getter] + fn url(&self) -> &str { + &self.inner.url + } + + #[getter] + fn status_code(&self) -> u16 { + self.inner.status_code + } + + #[getter] + fn content_type(&self) -> Option<&str> { + self.inner.content_type.as_deref() + } + + #[getter] + fn size(&self) -> Option { + self.inner.size + } + + #[getter] + fn last_modified(&self) -> Option<&str> { + self.inner.last_modified.as_deref() + } + + #[getter] + fn filename(&self) -> Option<&str> { + self.inner.filename.as_deref() + } + + #[getter] + fn format(&self) -> Option<&str> { + self.inner.format.as_deref() + } + + #[getter] + fn content(&self) -> Option<&str> { + self.inner.content.as_deref() + } + + #[getter] + fn truncated(&self) -> Option { + self.inner.truncated + } + + #[getter] + fn method(&self) -> Option<&str> { + self.inner.method.as_deref() + } + + #[getter] + fn error(&self) -> Option<&str> { + self.inner.error.as_deref() + } + + /// Convert to JSON string + fn to_json(&self) -> PyResult { + serde_json::to_string(&self.inner).map_err(|e| PyValueError::new_err(e.to_string())) + } + + fn __repr__(&self) -> String { + format!( + "WebFetchResponse(url={:?}, status_code={})", + self.inner.url, self.inner.status_code + ) + } +} + +/// Python wrapper for WebFetch Tool +#[pyclass(name = "WebFetchTool")] +pub struct PyWebFetchTool { + inner: Tool, + runtime: tokio::runtime::Runtime, +} + +#[pymethods] +impl PyWebFetchTool { + /// Create a new tool with default options + #[new] + #[pyo3(signature = (enable_markdown=true, enable_text=true, user_agent=None, allow_prefixes=None, block_prefixes=None))] + fn new( + enable_markdown: bool, + enable_text: bool, + user_agent: Option, + allow_prefixes: Option>, + block_prefixes: Option>, + ) -> PyResult { + let mut builder = ToolBuilder::new() + .enable_markdown(enable_markdown) + .enable_text(enable_text); + + if let Some(ua) = user_agent { + builder = builder.user_agent(ua); + } + + if let Some(prefixes) = allow_prefixes { + for prefix in prefixes { + builder = builder.allow_prefix(prefix); + } + } + + if let Some(prefixes) = block_prefixes { + for prefix in prefixes { + builder = builder.block_prefix(prefix); + } + } + + let runtime = tokio::runtime::Runtime::new() + .map_err(|e| PyValueError::new_err(format!("Failed to create runtime: {}", e)))?; + + Ok(Self { + inner: builder.build(), + runtime, + }) + } + + /// Get tool description + fn description(&self) -> &'static str { + self.inner.description() + } + + /// Get system prompt + fn system_prompt(&self) -> &'static str { + self.inner.system_prompt() + } + + /// Get full documentation (llmtxt) + fn llmtxt(&self) -> &'static str { + self.inner.llmtxt() + } + + /// Get input schema as JSON string + fn input_schema(&self) -> PyResult { + serde_json::to_string(&self.inner.input_schema()) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + /// Get output schema as JSON string + fn output_schema(&self) -> PyResult { + serde_json::to_string(&self.inner.output_schema()) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + /// Execute a fetch request + fn execute(&self, request: PyWebFetchRequest) -> PyResult { + let result = self.runtime.block_on(self.inner.execute(request.inner)); + match result { + Ok(response) => Ok(PyWebFetchResponse { inner: response }), + Err(e) => Err(to_py_err(e)), + } + } + + /// Fetch a URL directly (convenience method) + #[pyo3(signature = (url, method=None, as_markdown=None, as_text=None))] + fn fetch( + &self, + url: String, + method: Option, + as_markdown: Option, + as_text: Option, + ) -> PyResult { + let request = PyWebFetchRequest::new(url, method, as_markdown, as_text)?; + self.execute(request) + } +} + +/// Fetch a URL using default options (convenience function) +#[pyfunction] +#[pyo3(signature = (url, method=None, as_markdown=None, as_text=None))] +fn fetch( + url: String, + method: Option, + as_markdown: Option, + as_text: Option, +) -> PyResult { + let tool = PyWebFetchTool::new(true, true, None, None, None)?; + tool.fetch(url, method, as_markdown, as_text) +} + +/// Python module definition +#[pymodule] +fn webfetch_py(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_function(wrap_pyfunction!(fetch, m)?)?; + Ok(()) +} diff --git a/crates/webfetch/Cargo.toml b/crates/webfetch/Cargo.toml new file mode 100644 index 0000000..6e04c77 --- /dev/null +++ b/crates/webfetch/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "webfetch" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +description = "AI-friendly webfetch library for fetching and converting web content" + +[dependencies] +tokio = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +schemars = { workspace = true } +url = { workspace = true } +tracing = { workspace = true } +thiserror = { workspace = true } +futures = { workspace = true } +bytes = { workspace = true } + +[dev-dependencies] +wiremock = { workspace = true } +tokio-test = "0.4" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/crates/webfetch/src/client.rs b/crates/webfetch/src/client.rs new file mode 100644 index 0000000..ae11338 --- /dev/null +++ b/crates/webfetch/src/client.rs @@ -0,0 +1,381 @@ +//! HTTP client for WebFetch + +use crate::convert::{filter_excessive_newlines, html_to_markdown, html_to_text, is_html}; +use crate::error::FetchError; +use crate::types::{HttpMethod, WebFetchRequest, WebFetchResponse}; +use crate::DEFAULT_USER_AGENT; +use bytes::Bytes; +use futures::StreamExt; +use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, CONTENT_DISPOSITION, USER_AGENT}; +use std::time::Duration; +use tracing::{error, warn}; + +/// Binary content type prefixes +const BINARY_PREFIXES: &[&str] = &[ + "image/", + "audio/", + "video/", + "application/octet-stream", + "application/pdf", + "application/zip", + "application/gzip", + "application/x-tar", + "application/x-rar", + "application/x-7z", + "application/vnd.ms-", + "application/vnd.openxmlformats", + "font/", +]; + +/// First-byte timeout (connect + first response byte) +const FIRST_BYTE_TIMEOUT: Duration = Duration::from_secs(1); + +/// Body timeout (total) +const BODY_TIMEOUT: Duration = Duration::from_secs(30); + +/// Timeout message appended to truncated content +const TIMEOUT_MESSAGE: &str = "\n\n[..more content timed out...]"; + +/// Fetch options that can be configured via tool builder +#[derive(Debug, Clone, Default)] +pub struct FetchOptions { + /// Custom User-Agent + pub user_agent: Option, + /// Allow list of URL prefixes + pub allow_prefixes: Vec, + /// Block list of URL prefixes + pub block_prefixes: Vec, + /// Enable as_markdown option + pub enable_markdown: bool, + /// Enable as_text option + pub enable_text: bool, +} + +/// Fetch a URL and return the response +pub async fn fetch(req: WebFetchRequest) -> Result { + fetch_with_options(req, FetchOptions::default()).await +} + +/// Fetch a URL with custom options +pub async fn fetch_with_options( + req: WebFetchRequest, + options: FetchOptions, +) -> Result { + // Validate URL + if req.url.is_empty() { + return Err(FetchError::MissingUrl); + } + + if !req.url.starts_with("http://") && !req.url.starts_with("https://") { + return Err(FetchError::InvalidUrlScheme); + } + + // Check allow/block lists + if !options.allow_prefixes.is_empty() { + let allowed = options + .allow_prefixes + .iter() + .any(|prefix| req.url.starts_with(prefix)); + if !allowed { + return Err(FetchError::BlockedUrl); + } + } + + if options + .block_prefixes + .iter() + .any(|prefix| req.url.starts_with(prefix)) + { + return Err(FetchError::BlockedUrl); + } + + let method = req.effective_method(); + let wants_markdown = options.enable_markdown && req.wants_markdown(); + let wants_text = options.enable_text && req.wants_text(); + + // Build headers + let mut headers = HeaderMap::new(); + let user_agent = options.user_agent.as_deref().unwrap_or(DEFAULT_USER_AGENT); + headers.insert( + USER_AGENT, + HeaderValue::from_str(user_agent) + .unwrap_or_else(|_| HeaderValue::from_static(DEFAULT_USER_AGENT)), + ); + + // Set Accept header based on conversion mode + let accept = if wants_markdown { + "text/html, text/markdown, text/plain, */*;q=0.8" + } else if wants_text { + "text/html, text/plain, */*;q=0.8" + } else { + "*/*" + }; + headers.insert(ACCEPT, HeaderValue::from_static(accept)); + + // Build client + let client = reqwest::Client::builder() + .default_headers(headers) + .connect_timeout(FIRST_BYTE_TIMEOUT) + .timeout(FIRST_BYTE_TIMEOUT) // Initial timeout for first byte + .build() + .map_err(FetchError::ClientBuildError)?; + + // Build request + let reqwest_method = match method { + HttpMethod::Get => reqwest::Method::GET, + HttpMethod::Head => reqwest::Method::HEAD, + }; + + let request = client.request(reqwest_method.clone(), &req.url); + + // Send request + let response = request.send().await.map_err(FetchError::from_reqwest)?; + + let status_code = response.status().as_u16(); + let headers = response.headers().clone(); + + // Extract metadata + let content_type = headers + .get("content-type") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + + let last_modified = headers + .get("last-modified") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + + let content_length: Option = headers + .get("content-length") + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse().ok()); + + let filename = extract_filename(&headers, &req.url); + + // Handle HEAD request + if method == HttpMethod::Head { + return Ok(WebFetchResponse { + url: req.url, + status_code, + content_type, + size: content_length, + last_modified, + filename, + method: Some("HEAD".to_string()), + ..Default::default() + }); + } + + // Check for binary content + if let Some(ref ct) = content_type { + if is_binary_content_type(ct) { + return Ok(WebFetchResponse { + url: req.url, + status_code, + content_type, + size: content_length, + last_modified, + filename, + error: Some( + "Binary content is not supported. Only textual content (HTML, text, JSON, etc.) can be fetched." + .to_string(), + ), + ..Default::default() + }); + } + } + + // Read body with timeout + let (body, truncated) = read_body_with_timeout(response, BODY_TIMEOUT).await; + let size = body.len() as u64; + + // Convert to string + let content = String::from_utf8_lossy(&body).to_string(); + + // Determine format and convert if needed + let (format, final_content) = if is_html(&content_type, &content) { + if wants_markdown { + ("markdown".to_string(), html_to_markdown(&content)) + } else if wants_text { + ("text".to_string(), html_to_text(&content)) + } else { + ("raw".to_string(), content) + } + } else { + ("raw".to_string(), content) + }; + + // Apply newline filtering + let mut final_content = filter_excessive_newlines(&final_content); + + // Add timeout message if truncated + if truncated { + final_content.push_str(TIMEOUT_MESSAGE); + } + + Ok(WebFetchResponse { + url: req.url, + status_code, + content_type, + size: Some(size), + last_modified, + filename, + format: Some(format), + content: Some(final_content), + truncated: if truncated { Some(true) } else { None }, + ..Default::default() + }) +} + +/// Check if content type indicates binary content +fn is_binary_content_type(content_type: &str) -> bool { + let ct_lower = content_type.to_lowercase(); + BINARY_PREFIXES + .iter() + .any(|prefix| ct_lower.starts_with(prefix)) +} + +/// Extract filename from Content-Disposition header or URL +fn extract_filename(headers: &HeaderMap, url: &str) -> Option { + // Try Content-Disposition header first + if let Some(disposition) = headers.get(CONTENT_DISPOSITION) { + if let Ok(value) = disposition.to_str() { + if let Some(filename) = parse_content_disposition_filename(value) { + return Some(filename); + } + } + } + + // Fallback to URL path + if let Ok(parsed) = url::Url::parse(url) { + if let Some(mut segments) = parsed.path_segments() { + if let Some(last) = segments.next_back() { + if last.contains('.') && !last.is_empty() { + return Some(last.to_string()); + } + } + } + } + + None +} + +/// Parse filename from Content-Disposition header value +fn parse_content_disposition_filename(value: &str) -> Option { + // Look for filename="..." or filename=... + let patterns = ["filename=\"", "filename="]; + for pattern in patterns { + if let Some(start) = value.find(pattern) { + let rest = &value[start + pattern.len()..]; + if pattern.ends_with('"') { + // Quoted + if let Some(end) = rest.find('"') { + return Some(rest[..end].to_string()); + } + } else { + // Unquoted - take until space or semicolon + let end = rest + .find(|c: char| c.is_whitespace() || c == ';') + .unwrap_or(rest.len()); + let filename = rest[..end].trim_matches('"'); + if !filename.is_empty() { + return Some(filename.to_string()); + } + } + } + } + None +} + +/// Read response body with timeout, returning partial content if timeout occurs +async fn read_body_with_timeout(response: reqwest::Response, timeout: Duration) -> (Bytes, bool) { + let mut body = Vec::new(); + let mut stream = response.bytes_stream(); + let deadline = tokio::time::Instant::now() + timeout; + + loop { + let chunk_future = stream.next(); + let timeout_future = tokio::time::sleep_until(deadline); + + tokio::select! { + chunk = chunk_future => { + match chunk { + Some(Ok(bytes)) => { + body.extend_from_slice(&bytes); + } + Some(Err(e)) => { + error!("Error reading body chunk: {}", e); + // Return partial content on error + let has_content = !body.is_empty(); + return (Bytes::from(body), has_content); + } + None => { + // Stream complete + return (Bytes::from(body), false); + } + } + } + _ = timeout_future => { + warn!("Body timeout reached, returning partial content"); + return (Bytes::from(body), true); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_binary_content_type() { + assert!(is_binary_content_type("image/png")); + assert!(is_binary_content_type("image/jpeg")); + assert!(is_binary_content_type("audio/mp3")); + assert!(is_binary_content_type("video/mp4")); + assert!(is_binary_content_type("application/pdf")); + assert!(is_binary_content_type("application/octet-stream")); + assert!(is_binary_content_type("application/zip")); + assert!(is_binary_content_type("application/vnd.ms-excel")); + assert!(is_binary_content_type( + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + )); + assert!(is_binary_content_type("font/woff2")); + + assert!(!is_binary_content_type("text/html")); + assert!(!is_binary_content_type("text/plain")); + assert!(!is_binary_content_type("application/json")); + assert!(!is_binary_content_type("application/javascript")); + } + + #[test] + fn test_parse_content_disposition_filename() { + assert_eq!( + parse_content_disposition_filename("attachment; filename=\"file.pdf\""), + Some("file.pdf".to_string()) + ); + assert_eq!( + parse_content_disposition_filename("attachment; filename=file.pdf"), + Some("file.pdf".to_string()) + ); + assert_eq!( + parse_content_disposition_filename("inline; filename=\"report.xlsx\"; size=1234"), + Some("report.xlsx".to_string()) + ); + assert_eq!(parse_content_disposition_filename("inline"), None); + } + + #[test] + fn test_extract_filename_from_url() { + let headers = HeaderMap::new(); + assert_eq!( + extract_filename(&headers, "https://example.com/path/to/file.pdf"), + Some("file.pdf".to_string()) + ); + assert_eq!( + extract_filename(&headers, "https://example.com/path/to/document"), + None + ); + assert_eq!(extract_filename(&headers, "https://example.com/"), None); + } +} diff --git a/crates/webfetch/src/convert.rs b/crates/webfetch/src/convert.rs new file mode 100644 index 0000000..6695162 --- /dev/null +++ b/crates/webfetch/src/convert.rs @@ -0,0 +1,533 @@ +//! HTML conversion utilities + +/// Check if content is HTML based on content type and body +pub fn is_html(content_type: &Option, body: &str) -> bool { + // Check Content-Type + if let Some(ct) = content_type { + let ct_lower = ct.to_lowercase(); + if ct_lower.contains("text/html") || ct_lower.contains("application/xhtml") { + return true; + } + } + + // Check body start + let trimmed = body.trim_start(); + trimmed.starts_with(" String { + let mut output = String::new(); + let mut in_skip_element = 0; + let mut skip_elements: Vec = Vec::new(); + let mut list_depth: usize = 0; + let mut in_pre = false; + let mut in_blockquote = false; + + let mut chars = html.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '<' { + // Parse tag + let mut tag = String::new(); + while let Some(&next) = chars.peek() { + if next == '>' { + chars.next(); + break; + } + tag.push(chars.next().unwrap()); + } + + let tag_lower = tag.to_lowercase(); + let is_closing = tag_lower.starts_with('/'); + let tag_name = if is_closing { + tag_lower[1..].split_whitespace().next().unwrap_or("") + } else { + tag_lower.split_whitespace().next().unwrap_or("") + }; + + // Handle skip elements + let skip_tags = ["script", "style", "noscript", "iframe", "svg"]; + if skip_tags.contains(&tag_name) { + if is_closing { + if let Some(pos) = skip_elements.iter().rposition(|t| t == tag_name) { + skip_elements.remove(pos); + in_skip_element = skip_elements.len(); + } + } else if !tag.ends_with('/') { + skip_elements.push(tag_name.to_string()); + in_skip_element = skip_elements.len(); + } + continue; + } + + if in_skip_element > 0 { + continue; + } + + // Handle markdown conversion + match tag_name { + "h1" => { + if !is_closing { + output.push_str("\n# "); + } else { + output.push_str("\n\n"); + } + } + "h2" => { + if !is_closing { + output.push_str("\n## "); + } else { + output.push_str("\n\n"); + } + } + "h3" => { + if !is_closing { + output.push_str("\n### "); + } else { + output.push_str("\n\n"); + } + } + "h4" => { + if !is_closing { + output.push_str("\n#### "); + } else { + output.push_str("\n\n"); + } + } + "h5" => { + if !is_closing { + output.push_str("\n##### "); + } else { + output.push_str("\n\n"); + } + } + "h6" => { + if !is_closing { + output.push_str("\n###### "); + } else { + output.push_str("\n\n"); + } + } + "p" | "div" | "section" | "article" | "main" | "header" | "footer" => { + if is_closing { + output.push_str("\n\n"); + } + } + "br" => { + output.push('\n'); + } + "hr" => { + output.push_str("\n---\n"); + } + "ul" | "ol" => { + if is_closing { + list_depth = list_depth.saturating_sub(1); + if list_depth == 0 { + output.push('\n'); + } + } else { + list_depth += 1; + } + } + "li" => { + if !is_closing { + output.push('\n'); + for _ in 0..list_depth.saturating_sub(1) { + output.push_str(" "); + } + output.push_str("- "); + } + } + "strong" | "b" => { + output.push_str("**"); + } + "em" | "i" => { + output.push('*'); + } + "pre" => { + if !is_closing { + output.push_str("\n```\n"); + in_pre = true; + } else { + output.push_str("\n```\n"); + in_pre = false; + } + } + "code" => { + if !in_pre { + output.push('`'); + } + } + "blockquote" => { + if !is_closing { + in_blockquote = true; + output.push_str("\n> "); + } else { + in_blockquote = false; + output.push('\n'); + } + } + "a" => { + if !is_closing { + // Extract href + if let Some(href) = extract_attribute(&tag, "href") { + output.push('['); + // We'll close with ]() format - naive implementation + // Push href placeholder, will be formatted after link text + output.push_str(&format!("]({})", href)); + } + } + } + _ => {} + } + } else if in_skip_element == 0 { + // Text content + let decoded = decode_entity(c, &mut chars); + if in_blockquote && decoded == '\n' { + output.push_str("\n> "); + } else { + output.push(decoded); + } + } + } + + clean_whitespace(&output) +} + +/// Convert HTML to plain text +pub fn html_to_text(html: &str) -> String { + let mut output = String::new(); + let mut in_skip_element = 0; + let mut skip_elements: Vec = Vec::new(); + + let mut chars = html.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '<' { + // Parse tag + let mut tag = String::new(); + while let Some(&next) = chars.peek() { + if next == '>' { + chars.next(); + break; + } + tag.push(chars.next().unwrap()); + } + + let tag_lower = tag.to_lowercase(); + let is_closing = tag_lower.starts_with('/'); + let tag_name = if is_closing { + tag_lower[1..].split_whitespace().next().unwrap_or("") + } else { + tag_lower.split_whitespace().next().unwrap_or("") + }; + + // Handle skip elements + let skip_tags = ["script", "style", "noscript", "iframe", "svg"]; + if skip_tags.contains(&tag_name) { + if is_closing { + if let Some(pos) = skip_elements.iter().rposition(|t| t == tag_name) { + skip_elements.remove(pos); + in_skip_element = skip_elements.len(); + } + } else if !tag.ends_with('/') { + skip_elements.push(tag_name.to_string()); + in_skip_element = skip_elements.len(); + } + continue; + } + + if in_skip_element > 0 { + continue; + } + + // Handle newline-inducing elements + let newline_tags = [ + "p", "div", "br", "h1", "h2", "h3", "h4", "h5", "h6", "li", "tr", + ]; + if newline_tags.contains(&tag_name) && (is_closing || tag_name == "br") { + output.push('\n'); + } else if newline_tags.contains(&tag_name) && !is_closing { + // Opening tags like h1-h6, p, etc. also add newline + if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "p") { + output.push('\n'); + } + } + } else if in_skip_element == 0 { + // Text content + let decoded = decode_entity(c, &mut chars); + output.push(decoded); + } + } + + clean_whitespace(&output) +} + +/// Extract attribute value from tag +fn extract_attribute(tag: &str, attr: &str) -> Option { + let pattern = format!("{}=", attr); + let tag_lower = tag.to_lowercase(); + + if let Some(start) = tag_lower.find(&pattern) { + let rest = &tag[start + pattern.len()..]; + let rest = rest.trim_start(); + + if let Some(rest) = rest.strip_prefix('"') { + if let Some(end) = rest.find('"') { + return Some(rest[..end].to_string()); + } + } else if let Some(rest) = rest.strip_prefix('\'') { + if let Some(end) = rest.find('\'') { + return Some(rest[..end].to_string()); + } + } else { + let end = rest + .find(|c: char| c.is_whitespace() || c == '>') + .unwrap_or(rest.len()); + return Some(rest[..end].to_string()); + } + } + None +} + +/// Decode HTML entity starting from ampersand +fn decode_entity(c: char, chars: &mut std::iter::Peekable) -> char { + if c != '&' { + return c; + } + + let mut entity = String::new(); + while let Some(&next) = chars.peek() { + if next == ';' { + chars.next(); + break; + } + if next.is_whitespace() || entity.len() > 10 { + // Not a valid entity + return '&'; + } + entity.push(chars.next().unwrap()); + } + + match entity.as_str() { + "amp" => '&', + "lt" => '<', + "gt" => '>', + "quot" => '"', + "apos" => '\'', + "#39" => '\'', + "nbsp" => ' ', + "mdash" => '—', + "ndash" => '–', + "copy" => '©', + "reg" => '®', + _ => { + // Check for numeric entities + if let Some(num_str) = entity.strip_prefix('#') { + if let Some(stripped) = num_str.strip_prefix('x') { + // Hex entity + if let Ok(code) = u32::from_str_radix(stripped, 16) { + if let Some(ch) = char::from_u32(code) { + return ch; + } + } + } else if let Ok(code) = num_str.parse::() { + if let Some(ch) = char::from_u32(code) { + return ch; + } + } + } + // Unknown entity - return original + '&' + } + } +} + +/// Clean whitespace: collapse runs, trim, keep max 2 newlines +pub fn clean_whitespace(s: &str) -> String { + let mut result = String::new(); + let mut last_was_space = false; + let mut newline_count = 0; + + for c in s.chars() { + if c == '\n' { + // Remove trailing space before newline + if last_was_space && result.ends_with(' ') { + result.pop(); + } + newline_count += 1; + // Treat newline as space for next char collapsing + last_was_space = true; + if newline_count <= 2 { + result.push(c); + } + } else if c.is_whitespace() { + newline_count = 0; + if !last_was_space { + result.push(' '); + last_was_space = true; + } + } else { + newline_count = 0; + last_was_space = false; + result.push(c); + } + } + + result.trim().to_string() +} + +/// Filter excessive newlines: keep at most 2 consecutive newlines +pub fn filter_excessive_newlines(s: &str) -> String { + let mut result = String::new(); + let mut newline_count = 0; + + for c in s.chars() { + if c == '\n' { + newline_count += 1; + if newline_count <= 2 { + result.push(c); + } + } else { + newline_count = 0; + result.push(c); + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_html_by_content_type() { + assert!(is_html(&Some("text/html".to_string()), "")); + assert!(is_html(&Some("text/html; charset=utf-8".to_string()), "")); + assert!(is_html(&Some("application/xhtml+xml".to_string()), "")); + assert!(!is_html(&Some("text/plain".to_string()), "")); + assert!(!is_html(&Some("application/json".to_string()), "")); + } + + #[test] + fn test_is_html_by_body() { + assert!(is_html(&None, "")); + assert!(is_html(&None, " ")); + assert!(is_html(&None, "")); + assert!(!is_html(&None, "Hello world")); + assert!(!is_html(&None, "{\"json\": true}")); + } + + #[test] + fn test_html_to_markdown_headers() { + let html = "

Title

Subtitle

"; + let md = html_to_markdown(html); + assert!(md.contains("# Title")); + assert!(md.contains("## Subtitle")); + } + + #[test] + fn test_html_to_markdown_paragraphs() { + let html = "

First paragraph

Second paragraph

"; + let md = html_to_markdown(html); + assert!(md.contains("First paragraph")); + assert!(md.contains("Second paragraph")); + } + + #[test] + fn test_html_to_markdown_lists() { + let html = "
  • Item 1
  • Item 2
"; + let md = html_to_markdown(html); + assert!(md.contains("- Item 1")); + assert!(md.contains("- Item 2")); + } + + #[test] + fn test_html_to_markdown_emphasis() { + let html = "

bold and italic

"; + let md = html_to_markdown(html); + assert!(md.contains("**bold**")); + assert!(md.contains("*italic*")); + } + + #[test] + fn test_html_to_markdown_code() { + let html = "
code block
"; + let md = html_to_markdown(html); + assert!(md.contains("```")); + assert!(md.contains("code block")); + } + + #[test] + fn test_html_to_markdown_skip_script() { + let html = "

Before

After

"; + let md = html_to_markdown(html); + assert!(md.contains("Before")); + assert!(md.contains("After")); + assert!(!md.contains("alert")); + } + + #[test] + fn test_html_to_text_simple() { + let html = "

Hello

World

"; + let text = html_to_text(html); + assert!(text.contains("Hello")); + assert!(text.contains("World")); + } + + #[test] + fn test_html_to_text_skip_script() { + let html = "

Before

After

"; + let text = html_to_text(html); + assert!(text.contains("Before")); + assert!(text.contains("After")); + assert!(!text.contains("alert")); + } + + #[test] + fn test_entity_decoding() { + let html = "

& < > " '   — – © ®

"; + let text = html_to_text(html); + assert!(text.contains('&')); + assert!(text.contains('<')); + assert!(text.contains('>')); + assert!(text.contains('"')); + assert!(text.contains('\'')); + assert!(text.contains('—')); + assert!(text.contains('–')); + assert!(text.contains('©')); + assert!(text.contains('®')); + } + + #[test] + fn test_filter_excessive_newlines() { + let input = "line1\n\n\n\n\nline2"; + let output = filter_excessive_newlines(input); + assert_eq!(output, "line1\n\nline2"); + } + + #[test] + fn test_clean_whitespace() { + let input = " hello world \n\n\n\n test "; + let output = clean_whitespace(input); + assert_eq!(output, "hello world\n\ntest"); + } + + #[test] + fn test_extract_attribute() { + assert_eq!( + extract_attribute("a href=\"https://example.com\" class=\"link\"", "href"), + Some("https://example.com".to_string()) + ); + assert_eq!( + extract_attribute("img src='image.png'", "src"), + Some("image.png".to_string()) + ); + assert_eq!( + extract_attribute("div class=test", "class"), + Some("test".to_string()) + ); + } +} diff --git a/crates/webfetch/src/error.rs b/crates/webfetch/src/error.rs new file mode 100644 index 0000000..79cad94 --- /dev/null +++ b/crates/webfetch/src/error.rs @@ -0,0 +1,81 @@ +//! Error types for WebFetch + +use thiserror::Error; + +/// Errors that can occur during fetch operations +#[derive(Debug, Error)] +pub enum FetchError { + /// URL is missing + #[error("Missing required parameter: url")] + MissingUrl, + + /// URL has invalid scheme + #[error("Invalid URL: must start with http:// or https://")] + InvalidUrlScheme, + + /// Invalid HTTP method + #[error("Invalid method: must be GET or HEAD")] + InvalidMethod, + + /// URL is blocked by prefix list + #[error("Blocked URL: prefix not allowed")] + BlockedUrl, + + /// Failed to build HTTP client + #[error("Failed to create HTTP client")] + ClientBuildError(#[source] reqwest::Error), + + /// Request timed out waiting for first byte + #[error("Request timed out: server did not respond within 1 second")] + FirstByteTimeout, + + /// Failed to connect to server + #[error("Failed to connect to server")] + ConnectError(#[source] reqwest::Error), + + /// Other request error + #[error("Request failed: {0}")] + RequestError(String), +} + +impl FetchError { + /// Create an error from a reqwest error + pub fn from_reqwest(err: reqwest::Error) -> Self { + if err.is_timeout() { + FetchError::FirstByteTimeout + } else if err.is_connect() { + FetchError::ConnectError(err) + } else { + FetchError::RequestError(err.to_string()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_messages() { + assert_eq!( + FetchError::MissingUrl.to_string(), + "Missing required parameter: url" + ); + assert_eq!( + FetchError::InvalidUrlScheme.to_string(), + "Invalid URL: must start with http:// or https://" + ); + assert_eq!( + FetchError::InvalidMethod.to_string(), + "Invalid method: must be GET or HEAD" + ); + assert_eq!( + FetchError::BlockedUrl.to_string(), + "Blocked URL: prefix not allowed" + ); + assert_eq!( + FetchError::FirstByteTimeout.to_string(), + "Request timed out: server did not respond within 1 second" + ); + } +} diff --git a/crates/webfetch/src/lib.rs b/crates/webfetch/src/lib.rs new file mode 100644 index 0000000..b291b3b --- /dev/null +++ b/crates/webfetch/src/lib.rs @@ -0,0 +1,81 @@ +//! WebFetch - AI-friendly web content fetching library +//! +//! This crate provides a reusable library API for fetching web content, +//! with optional HTML to markdown/text conversion. + +mod client; +mod convert; +mod error; +mod tool; +mod types; + +pub use client::fetch; +pub use convert::{html_to_markdown, html_to_text}; +pub use error::FetchError; +pub use tool::{Tool, ToolBuilder, ToolStatus}; +pub use types::{HttpMethod, WebFetchRequest, WebFetchResponse}; + +/// Default User-Agent string +pub const DEFAULT_USER_AGENT: &str = "Everruns WebFetch/1.0"; + +/// Tool description for LLM consumption +pub const TOOL_DESCRIPTION: &str = r#"Fetches content from a URL and optionally converts HTML to markdown or text. + +- Supports GET and HEAD methods +- Converts HTML to markdown or plain text +- Returns metadata for binary content +- Strict timeouts for reliability"#; + +/// Extended documentation for LLM consumption (llmtxt) +pub const TOOL_LLMTXT: &str = r#"# WebFetch Tool + +Fetches content from a URL and optionally converts HTML to markdown or text. + +## Capabilities +- HTTP GET and HEAD requests +- HTML to Markdown conversion +- HTML to plain text conversion +- Binary content detection (returns metadata only) +- Automatic timeout handling + +## Input Parameters +- `url` (required): The URL to fetch (must be http:// or https://) +- `method` (optional): GET or HEAD (default: GET) +- `as_markdown` (optional): Convert HTML to markdown +- `as_text` (optional): Convert HTML to plain text + +## Output Fields +- `url`: The fetched URL +- `status_code`: HTTP status code +- `content_type`: Content-Type header value +- `size`: Content size in bytes +- `last_modified`: Last-Modified header value +- `filename`: Extracted filename +- `format`: "markdown", "text", or "raw" +- `content`: The fetched/converted content +- `truncated`: True if content was truncated due to timeout +- `method`: "HEAD" for HEAD requests +- `error`: Error message for binary content + +## Examples + +### Fetch a webpage as markdown +```json +{"url": "https://example.com", "as_markdown": true} +``` + +### Check if a URL exists (HEAD request) +```json +{"url": "https://example.com/file.pdf", "method": "HEAD"} +``` + +### Fetch raw content +```json +{"url": "https://api.example.com/data.json"} +``` + +## Error Handling +- Invalid URLs return an error +- Binary content returns metadata with error message +- Timeouts return partial content with truncated flag +"#; diff --git a/crates/webfetch/src/tool.rs b/crates/webfetch/src/tool.rs new file mode 100644 index 0000000..2175617 --- /dev/null +++ b/crates/webfetch/src/tool.rs @@ -0,0 +1,308 @@ +//! Tool builder and contract for WebFetch + +use crate::client::{fetch_with_options, FetchOptions}; +use crate::error::FetchError; +use crate::types::{WebFetchRequest, WebFetchResponse}; +use crate::{TOOL_DESCRIPTION, TOOL_LLMTXT}; +use schemars::schema_for; +use serde::{Deserialize, Serialize}; + +/// Status update during tool execution +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ToolStatus { + /// Current phase (e.g., "validate", "connect", "fetch", "convert") + pub phase: String, + /// Optional message + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, + /// Estimated completion percentage (0-100) + #[serde(skip_serializing_if = "Option::is_none")] + pub percent_complete: Option, + /// Estimated time remaining in milliseconds + #[serde(skip_serializing_if = "Option::is_none")] + pub eta_ms: Option, +} + +impl ToolStatus { + /// Create a new status with phase + pub fn new(phase: impl Into) -> Self { + Self { + phase: phase.into(), + message: None, + percent_complete: None, + eta_ms: None, + } + } + + /// Set message + pub fn with_message(mut self, message: impl Into) -> Self { + self.message = Some(message.into()); + self + } + + /// Set completion percentage + pub fn with_percent(mut self, percent: f32) -> Self { + self.percent_complete = Some(percent); + self + } + + /// Set ETA + pub fn with_eta(mut self, eta_ms: u64) -> Self { + self.eta_ms = Some(eta_ms); + self + } +} + +/// Builder for configuring the WebFetch tool +#[derive(Debug, Clone, Default)] +pub struct ToolBuilder { + /// Enable as_markdown option + enable_markdown: bool, + /// Enable as_text option + enable_text: bool, + /// Custom User-Agent + user_agent: Option, + /// Allow list of URL prefixes + allow_prefixes: Vec, + /// Block list of URL prefixes + block_prefixes: Vec, +} + +impl ToolBuilder { + /// Create a new tool builder with all options enabled + pub fn new() -> Self { + Self { + enable_markdown: true, + enable_text: true, + ..Default::default() + } + } + + /// Enable as_markdown option + pub fn enable_markdown(mut self, enable: bool) -> Self { + self.enable_markdown = enable; + self + } + + /// Enable as_text option + pub fn enable_text(mut self, enable: bool) -> Self { + self.enable_text = enable; + self + } + + /// Set custom User-Agent + pub fn user_agent(mut self, ua: impl Into) -> Self { + self.user_agent = Some(ua.into()); + self + } + + /// Add URL prefix to allow list + pub fn allow_prefix(mut self, prefix: impl Into) -> Self { + self.allow_prefixes.push(prefix.into()); + self + } + + /// Add URL prefix to block list + pub fn block_prefix(mut self, prefix: impl Into) -> Self { + self.block_prefixes.push(prefix.into()); + self + } + + /// Build the tool + pub fn build(self) -> Tool { + Tool { + enable_markdown: self.enable_markdown, + enable_text: self.enable_text, + user_agent: self.user_agent, + allow_prefixes: self.allow_prefixes, + block_prefixes: self.block_prefixes, + } + } +} + +/// Configured WebFetch tool +#[derive(Debug, Clone)] +pub struct Tool { + enable_markdown: bool, + enable_text: bool, + user_agent: Option, + allow_prefixes: Vec, + block_prefixes: Vec, +} + +impl Default for Tool { + fn default() -> Self { + ToolBuilder::new().build() + } +} + +impl Tool { + /// Create a new tool builder + pub fn builder() -> ToolBuilder { + ToolBuilder::new() + } + + /// Get tool description + pub fn description(&self) -> &'static str { + TOOL_DESCRIPTION + } + + /// Get system prompt (empty for this tool) + pub fn system_prompt(&self) -> &'static str { + "" + } + + /// Get full documentation (llmtxt) + pub fn llmtxt(&self) -> &'static str { + TOOL_LLMTXT + } + + /// Get input schema as JSON + pub fn input_schema(&self) -> serde_json::Value { + let schema = schema_for!(WebFetchRequest); + let mut value = serde_json::to_value(schema).unwrap_or_default(); + + // Remove disabled options from schema + if let Some(props) = value.get_mut("properties").and_then(|p| p.as_object_mut()) { + if !self.enable_markdown { + props.remove("as_markdown"); + } + if !self.enable_text { + props.remove("as_text"); + } + } + + value + } + + /// Get output schema as JSON + pub fn output_schema(&self) -> serde_json::Value { + let schema = schema_for!(WebFetchResponse); + serde_json::to_value(schema).unwrap_or_default() + } + + /// Execute the tool with the given request + pub async fn execute(&self, req: WebFetchRequest) -> Result { + let options = FetchOptions { + user_agent: self.user_agent.clone(), + allow_prefixes: self.allow_prefixes.clone(), + block_prefixes: self.block_prefixes.clone(), + enable_markdown: self.enable_markdown, + enable_text: self.enable_text, + }; + + fetch_with_options(req, options).await + } + + /// Execute the tool with status updates + pub async fn execute_with_status( + &self, + req: WebFetchRequest, + mut status_callback: F, + ) -> Result + where + F: FnMut(ToolStatus), + { + status_callback(ToolStatus::new("validate").with_percent(0.0)); + + // Validate request + if req.url.is_empty() { + return Err(FetchError::MissingUrl); + } + + if !req.url.starts_with("http://") && !req.url.starts_with("https://") { + return Err(FetchError::InvalidUrlScheme); + } + + status_callback(ToolStatus::new("connect").with_percent(10.0)); + + let options = FetchOptions { + user_agent: self.user_agent.clone(), + allow_prefixes: self.allow_prefixes.clone(), + block_prefixes: self.block_prefixes.clone(), + enable_markdown: self.enable_markdown, + enable_text: self.enable_text, + }; + + status_callback(ToolStatus::new("fetch").with_percent(20.0)); + + let result = fetch_with_options(req, options).await; + + status_callback(ToolStatus::new("complete").with_percent(100.0)); + + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tool_builder() { + let tool = Tool::builder() + .enable_markdown(false) + .enable_text(true) + .user_agent("TestAgent/1.0") + .allow_prefix("https://allowed.com") + .block_prefix("https://blocked.com") + .build(); + + assert!(!tool.enable_markdown); + assert!(tool.enable_text); + assert_eq!(tool.user_agent, Some("TestAgent/1.0".to_string())); + assert_eq!(tool.allow_prefixes, vec!["https://allowed.com"]); + assert_eq!(tool.block_prefixes, vec!["https://blocked.com"]); + } + + #[test] + fn test_tool_description() { + let tool = Tool::default(); + assert!(!tool.description().is_empty()); + assert!(tool.system_prompt().is_empty()); + assert!(!tool.llmtxt().is_empty()); + } + + #[test] + fn test_tool_schemas() { + let tool = Tool::default(); + let input_schema = tool.input_schema(); + let output_schema = tool.output_schema(); + + // Input schema should have url property + assert!(input_schema["properties"]["url"].is_object()); + + // Output schema should have url and status_code + assert!(output_schema["properties"]["url"].is_object()); + assert!(output_schema["properties"]["status_code"].is_object()); + } + + #[test] + fn test_tool_schema_feature_gating() { + let tool = Tool::builder() + .enable_markdown(false) + .enable_text(false) + .build(); + + let schema = tool.input_schema(); + + // Disabled options should be removed from schema + if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) { + assert!(!props.contains_key("as_markdown")); + assert!(!props.contains_key("as_text")); + } + } + + #[test] + fn test_tool_status() { + let status = ToolStatus::new("fetch") + .with_message("Fetching URL") + .with_percent(50.0) + .with_eta(5000); + + assert_eq!(status.phase, "fetch"); + assert_eq!(status.message, Some("Fetching URL".to_string())); + assert_eq!(status.percent_complete, Some(50.0)); + assert_eq!(status.eta_ms, Some(5000)); + } +} diff --git a/crates/webfetch/src/types.rs b/crates/webfetch/src/types.rs new file mode 100644 index 0000000..a735f78 --- /dev/null +++ b/crates/webfetch/src/types.rs @@ -0,0 +1,209 @@ +//! Core types for WebFetch + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::str::FromStr; + +/// HTTP method for the request +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "UPPERCASE")] +pub enum HttpMethod { + /// HTTP GET request + #[default] + Get, + /// HTTP HEAD request + Head, +} + +impl FromStr for HttpMethod { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_uppercase().as_str() { + "GET" => Ok(HttpMethod::Get), + "HEAD" => Ok(HttpMethod::Head), + _ => Err("Invalid method: must be GET or HEAD".to_string()), + } + } +} + +impl std::fmt::Display for HttpMethod { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HttpMethod::Get => write!(f, "GET"), + HttpMethod::Head => write!(f, "HEAD"), + } + } +} + +/// Request to fetch a URL +#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] +pub struct WebFetchRequest { + /// The URL to fetch (required, must be http:// or https://) + pub url: String, + + /// HTTP method (optional, default GET) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub method: Option, + + /// Convert HTML to markdown (optional) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub as_markdown: Option, + + /// Convert HTML to plain text (optional) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub as_text: Option, +} + +impl WebFetchRequest { + /// Create a new request with the given URL + pub fn new(url: impl Into) -> Self { + Self { + url: url.into(), + ..Default::default() + } + } + + /// Set the HTTP method + pub fn method(mut self, method: HttpMethod) -> Self { + self.method = Some(method); + self + } + + /// Enable markdown conversion + pub fn as_markdown(mut self) -> Self { + self.as_markdown = Some(true); + self + } + + /// Enable text conversion + pub fn as_text(mut self) -> Self { + self.as_text = Some(true); + self + } + + /// Get the effective method (default to GET) + pub fn effective_method(&self) -> HttpMethod { + self.method.unwrap_or_default() + } + + /// Check if markdown conversion is requested + pub fn wants_markdown(&self) -> bool { + self.as_markdown.unwrap_or(false) + } + + /// Check if text conversion is requested + pub fn wants_text(&self) -> bool { + self.as_text.unwrap_or(false) + } +} + +/// Response from a fetch operation +#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] +pub struct WebFetchResponse { + /// The fetched URL + pub url: String, + + /// HTTP status code + pub status_code: u16, + + /// Content-Type header value + #[serde(skip_serializing_if = "Option::is_none")] + pub content_type: Option, + + /// Content size in bytes + #[serde(skip_serializing_if = "Option::is_none")] + pub size: Option, + + /// Last-Modified header value + #[serde(skip_serializing_if = "Option::is_none")] + pub last_modified: Option, + + /// Extracted filename + #[serde(skip_serializing_if = "Option::is_none")] + pub filename: Option, + + /// Content format: "markdown", "text", or "raw" + #[serde(skip_serializing_if = "Option::is_none")] + pub format: Option, + + /// The fetched/converted content + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option, + + /// True if content was truncated due to timeout + #[serde(skip_serializing_if = "Option::is_none")] + pub truncated: Option, + + /// "HEAD" for HEAD requests + #[serde(skip_serializing_if = "Option::is_none")] + pub method: Option, + + /// Error message (for binary content) + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_http_method_from_str() { + assert_eq!(HttpMethod::from_str("GET").unwrap(), HttpMethod::Get); + assert_eq!(HttpMethod::from_str("get").unwrap(), HttpMethod::Get); + assert_eq!(HttpMethod::from_str("Get").unwrap(), HttpMethod::Get); + assert_eq!(HttpMethod::from_str("HEAD").unwrap(), HttpMethod::Head); + assert_eq!(HttpMethod::from_str("head").unwrap(), HttpMethod::Head); + assert!(HttpMethod::from_str("POST").is_err()); + assert!(HttpMethod::from_str("invalid").is_err()); + } + + #[test] + fn test_http_method_display() { + assert_eq!(HttpMethod::Get.to_string(), "GET"); + assert_eq!(HttpMethod::Head.to_string(), "HEAD"); + } + + #[test] + fn test_request_builder() { + let req = WebFetchRequest::new("https://example.com") + .method(HttpMethod::Head) + .as_markdown(); + + assert_eq!(req.url, "https://example.com"); + assert_eq!(req.method, Some(HttpMethod::Head)); + assert_eq!(req.as_markdown, Some(true)); + } + + #[test] + fn test_request_effective_method() { + let req = WebFetchRequest::new("https://example.com"); + assert_eq!(req.effective_method(), HttpMethod::Get); + + let req = req.method(HttpMethod::Head); + assert_eq!(req.effective_method(), HttpMethod::Head); + } + + #[test] + fn test_request_serialization() { + let req = WebFetchRequest::new("https://example.com").as_markdown(); + let json = serde_json::to_string(&req).unwrap(); + assert!(json.contains("\"url\":\"https://example.com\"")); + assert!(json.contains("\"as_markdown\":true")); + } + + #[test] + fn test_response_serialization() { + let resp = WebFetchResponse { + url: "https://example.com".to_string(), + status_code: 200, + content: Some("Hello".to_string()), + ..Default::default() + }; + let json = serde_json::to_string(&resp).unwrap(); + // Optional None fields should be omitted + assert!(!json.contains("content_type")); + assert!(json.contains("\"content\":\"Hello\"")); + } +} diff --git a/crates/webfetch/tests/integration.rs b/crates/webfetch/tests/integration.rs new file mode 100644 index 0000000..e4da485 --- /dev/null +++ b/crates/webfetch/tests/integration.rs @@ -0,0 +1,445 @@ +//! Integration tests for WebFetch using wiremock + +use webfetch::{fetch, HttpMethod, Tool, WebFetchRequest}; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +#[tokio::test] +async fn test_simple_get() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string("Hello, World!") + .insert_header("content-type", "text/plain"), + ) + .mount(&mock_server) + .await; + + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())); + let resp = fetch(req).await.unwrap(); + + assert_eq!(resp.status_code, 200); + assert_eq!(resp.content_type, Some("text/plain".to_string())); + assert!(resp.content.unwrap().contains("Hello, World!")); + assert_eq!(resp.format, Some("raw".to_string())); +} + +#[tokio::test] +async fn test_head_request() { + let mock_server = MockServer::start().await; + + Mock::given(method("HEAD")) + .and(path("/file.pdf")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "application/pdf") + .insert_header("content-length", "12345") + .insert_header("last-modified", "Tue, 01 Jan 2024 00:00:00 GMT"), + ) + .mount(&mock_server) + .await; + + let req = + WebFetchRequest::new(format!("{}/file.pdf", mock_server.uri())).method(HttpMethod::Head); + let resp = fetch(req).await.unwrap(); + + assert_eq!(resp.status_code, 200); + assert_eq!(resp.method, Some("HEAD".to_string())); + assert_eq!(resp.content_type, Some("application/pdf".to_string())); + assert_eq!(resp.size, Some(12345)); + assert_eq!( + resp.last_modified, + Some("Tue, 01 Jan 2024 00:00:00 GMT".to_string()) + ); + assert!(resp.content.is_none()); +} + +#[tokio::test] +async fn test_html_to_markdown() { + let mock_server = MockServer::start().await; + + let html = r#" + +Test + +

Hello World

+

This is a test paragraph.

+
    +
  • Item 1
  • +
  • Item 2
  • +
+ +"#; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_raw(html, "text/html")) + .mount(&mock_server) + .await; + + let tool = Tool::default(); + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())).as_markdown(); + let resp = tool.execute(req).await.unwrap(); + + assert_eq!(resp.status_code, 200); + assert_eq!(resp.format, Some("markdown".to_string())); + + let content = resp.content.unwrap(); + assert!(content.contains("# Hello World")); + assert!(content.contains("**test**")); + assert!(content.contains("- Item 1")); + assert!(content.contains("- Item 2")); +} + +#[tokio::test] +async fn test_html_to_text() { + let mock_server = MockServer::start().await; + + let html = r#" + + +

Title

+

Paragraph text.

+ + +"#; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_raw(html, "text/html")) + .mount(&mock_server) + .await; + + let tool = Tool::default(); + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())).as_text(); + let resp = tool.execute(req).await.unwrap(); + + assert_eq!(resp.status_code, 200); + assert_eq!(resp.format, Some("text".to_string())); + + let content = resp.content.unwrap(); + assert!(content.contains("Title")); + assert!(content.contains("Paragraph text")); + assert!(!content.contains("alert")); // Script should be stripped +} + +#[tokio::test] +async fn test_binary_content() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/image.png")) + .respond_with( + ResponseTemplate::new(200) + .set_body_bytes(vec![0x89, 0x50, 0x4E, 0x47]) // PNG magic bytes + .insert_header("content-type", "image/png") + .insert_header("content-length", "4"), + ) + .mount(&mock_server) + .await; + + let req = WebFetchRequest::new(format!("{}/image.png", mock_server.uri())); + let resp = fetch(req).await.unwrap(); + + assert_eq!(resp.status_code, 200); + assert_eq!(resp.content_type, Some("image/png".to_string())); + assert_eq!(resp.size, Some(4)); + assert!(resp.content.is_none()); + assert!(resp.error.is_some()); + assert!(resp.error.unwrap().contains("Binary content")); +} + +#[tokio::test] +async fn test_4xx_status() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/not-found")) + .respond_with( + ResponseTemplate::new(404) + .set_body_string("Not Found") + .insert_header("content-type", "text/plain"), + ) + .mount(&mock_server) + .await; + + let req = WebFetchRequest::new(format!("{}/not-found", mock_server.uri())); + let resp = fetch(req).await.unwrap(); + + // 4xx is still a success response (not a tool error) + assert_eq!(resp.status_code, 404); + assert!(resp.content.unwrap().contains("Not Found")); +} + +#[tokio::test] +async fn test_5xx_status() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/error")) + .respond_with( + ResponseTemplate::new(500) + .set_body_string("Internal Server Error") + .insert_header("content-type", "text/plain"), + ) + .mount(&mock_server) + .await; + + let req = WebFetchRequest::new(format!("{}/error", mock_server.uri())); + let resp = fetch(req).await.unwrap(); + + // 5xx is still a success response (not a tool error) + assert_eq!(resp.status_code, 500); + assert!(resp.content.unwrap().contains("Internal Server Error")); +} + +#[tokio::test] +async fn test_content_disposition_filename() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/download")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string("file content") + .insert_header("content-type", "text/plain") + .insert_header("content-disposition", "attachment; filename=\"report.txt\""), + ) + .mount(&mock_server) + .await; + + let req = WebFetchRequest::new(format!("{}/download", mock_server.uri())); + let resp = fetch(req).await.unwrap(); + + assert_eq!(resp.filename, Some("report.txt".to_string())); +} + +#[tokio::test] +async fn test_filename_from_url() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/path/to/document.pdf")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "application/pdf") + .insert_header("content-length", "100"), + ) + .mount(&mock_server) + .await; + + let req = WebFetchRequest::new(format!("{}/path/to/document.pdf", mock_server.uri())) + .method(HttpMethod::Head); + let resp = fetch(req).await.unwrap(); + + assert_eq!(resp.filename, Some("document.pdf".to_string())); +} + +#[tokio::test] +async fn test_size_for_text_content() { + let mock_server = MockServer::start().await; + + let body = "Hello, this is test content!"; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string(body) + .insert_header("content-type", "text/plain"), + ) + .mount(&mock_server) + .await; + + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())); + let resp = fetch(req).await.unwrap(); + + // Size should equal bytes read from body + assert_eq!(resp.size, Some(body.len() as u64)); +} + +#[tokio::test] +async fn test_url_prefix_allow_list() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string("OK")) + .mount(&mock_server) + .await; + + // Create tool with allow list that doesn't include the mock server + let tool = Tool::builder() + .allow_prefix("https://allowed.example.com") + .build(); + + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())); + let result = tool.execute(req).await; + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("prefix not allowed")); +} + +#[tokio::test] +async fn test_url_prefix_block_list() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string("OK")) + .mount(&mock_server) + .await; + + // Create tool with block list that includes localhost + let tool = Tool::builder().block_prefix("http://127.0.0.1").build(); + + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())); + let result = tool.execute(req).await; + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("prefix not allowed")); +} + +#[tokio::test] +async fn test_invalid_url_scheme() { + let req = WebFetchRequest::new("ftp://example.com/file.txt"); + let result = fetch(req).await; + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("http:// or https://")); +} + +#[tokio::test] +async fn test_missing_url() { + let req = WebFetchRequest::new(""); + let result = fetch(req).await; + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Missing")); +} + +#[tokio::test] +async fn test_entity_decoding_in_html() { + let mock_server = MockServer::start().await; + + let html = "

Tom & Jerry <3 > others "quoted"

"; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_raw(html, "text/html")) + .mount(&mock_server) + .await; + + let tool = Tool::default(); + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())).as_text(); + let resp = tool.execute(req).await.unwrap(); + + let content = resp.content.unwrap(); + assert!(content.contains("Tom & Jerry")); + assert!(content.contains("<3")); + assert!(content.contains("> others")); + assert!(content.contains("\"quoted\"")); +} + +#[tokio::test] +async fn test_non_html_with_conversion_flags() { + let mock_server = MockServer::start().await; + + let json = r#"{"key": "value"}"#; + + Mock::given(method("GET")) + .and(path("/api/data")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string(json) + .insert_header("content-type", "application/json"), + ) + .mount(&mock_server) + .await; + + let tool = Tool::default(); + let req = WebFetchRequest::new(format!("{}/api/data", mock_server.uri())).as_markdown(); + let resp = tool.execute(req).await.unwrap(); + + // Non-HTML should return raw even with as_markdown flag + assert_eq!(resp.format, Some("raw".to_string())); + assert!(resp.content.unwrap().contains("\"key\"")); +} + +#[tokio::test] +async fn test_html_detection_by_body() { + let mock_server = MockServer::start().await; + + // Server returns HTML without proper content-type + let html = "Hello"; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string(html) + .insert_header("content-type", "text/plain"), // Wrong content-type + ) + .mount(&mock_server) + .await; + + let tool = Tool::default(); + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())).as_markdown(); + let resp = tool.execute(req).await.unwrap(); + + // Should detect HTML by body content and convert + assert_eq!(resp.format, Some("markdown".to_string())); +} + +#[tokio::test] +async fn test_custom_user_agent() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string("OK")) + .mount(&mock_server) + .await; + + let tool = Tool::builder().user_agent("CustomBot/1.0").build(); + + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())); + let resp = tool.execute(req).await.unwrap(); + + assert_eq!(resp.status_code, 200); +} + +#[tokio::test] +async fn test_excessive_newlines_filtered() { + let mock_server = MockServer::start().await; + + let body = "Line1\n\n\n\n\n\nLine2"; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string(body) + .insert_header("content-type", "text/plain"), + ) + .mount(&mock_server) + .await; + + let req = WebFetchRequest::new(format!("{}/", mock_server.uri())); + let resp = fetch(req).await.unwrap(); + + // Should have at most 2 consecutive newlines + assert!(!resp.content.unwrap().contains("\n\n\n")); +}