diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index d1c4fde..a6bbefd 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -16,12 +16,16 @@ jobs: - uses: actions/checkout@v5 - name: Build run: cargo build --verbose + - name: Build with serde + run: cargo build --features serde --verbose + - name: Build with miniserde + run: cargo build --features miniserde --verbose + - name: Build with borsh + run: cargo build --features borsh --verbose - name: Run tests run: cargo test --verbose - name: Run miniserde tests run: cargo test --features miniserde --verbose - - name: Run nanoserde tests - run: cargo test --features nanoserde --verbose - name: Run borsh tests run: cargo test --features borsh --verbose @@ -54,7 +58,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [1.82.0, 1.83.0] + rust: [1.85.0, 1.86.0] timeout-minutes: 45 steps: - uses: actions/checkout@v5 @@ -62,11 +66,17 @@ jobs: with: toolchain: ${{matrix.rust}} - run: cargo build + - run: cargo build --features serde + - run: cargo build --features serde --no-default-features + - run: cargo build --features miniserde + - run: cargo build --features borsh + - run: cargo build --features borsh,serde + - run: cargo build --features borsh,serde,miniserde + - run: cargo build --features borsh,serde,miniserde --no-default-features - run: cargo test - run: cargo test --features serde - - run: cargo test --features nanoserde - - run: cargo test --features miniserde - - run: cargo test --features borsh + - run: cargo test --features borsh,serde,miniserde + - run: cargo test --features borsh,serde,miniserde --no-default-features clippy: runs-on: ubuntu-latest @@ -79,7 +89,6 @@ jobs: - run: cargo clippy --workspace --tests --examples - run: cargo clippy --workspace --tests --examples --features serde - run: cargo clippy --workspace --tests --examples --features miniserde - - run: cargo clippy --workspace --tests --examples --features nanoserde - run: cargo clippy --workspace --tests --examples --features borsh docs: diff --git a/.vscode/settings.json b/.vscode/settings.json index 93704c9..d8146ae 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { - "rust-analyzer.cargo.features": ["serde", "nanoserde"] + "rust-analyzer.check.command": "clippy", + "rust-analyzer.cargo.features": ["serde", "borsh", "miniserde"] } diff --git a/Cargo.toml b/Cargo.toml index 6ee3fd0..5d55636 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,34 +1,24 @@ -[package] -name = "bit-vec" -version = "0.9.1" -authors = ["Alexis Beingessner "] -license = "Apache-2.0 OR MIT" -description = "A vector of bits" -repository = "https://github.com/contain-rs/bit-vec" -homepage = "https://github.com/contain-rs/bit-vec" -documentation = "https://docs.rs/bit-vec/" -keywords = ["data-structures", "bitvec", "bitmask", "bitmap", "bit"] -readme = "README.md" -edition = "2021" -rust-version = "1.82" +[workspace] -[dependencies] -borsh = { version = "1.6.0", default-features = false, features = ["derive"], optional = true } -serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } -miniserde = { version = "0.1", optional = true } -nanoserde = { version = "0.2", optional = true } +members = [ + "vec", + "set", + "matrix", +] -[dev-dependencies] -serde_json = "1.0" -rand = "0.9" -rand_xorshift = "0.4" +exclude = [ + "fuzz", +] -[features] -default = ["std"] -serde_std = ["std", "serde/std"] -serde_no_std = [] -borsh_std = ["borsh/std"] -std = ["serde?/std"] +resolver = "2" -[package.metadata.docs.rs] -features = ["borsh", "serde", "miniserde", "nanoserde"] +# add debug info for profiling? +[profile.release] +debug = false + +[workspace.dependencies] + +[workspace.package] +# shared version of all public crates in the workspace +version = "0.10.0" +rust-version = "1.85.0" diff --git a/README.md b/README.md deleted file mode 100644 index 84780dc..0000000 --- a/README.md +++ /dev/null @@ -1,137 +0,0 @@ -
-

bit-vec

-

- A compact vector of bits. -

-

- -[![crates.io][crates.io shield]][crates.io link] -[![Documentation][docs.rs badge]][docs.rs link] -![Rust CI][github ci badge] -![MSRV][rustc 1.82+] -
-
-[![Dependency Status][deps.rs status]][deps.rs link] -[![Download Status][shields.io download count]][crates.io link] - -

-
- -[crates.io shield]: https://img.shields.io/crates/v/bit-vec?label=latest -[crates.io link]: https://crates.io/crates/bit-vec -[docs.rs badge]: https://docs.rs/bit-vec/badge.svg?version=0.9.1 -[docs.rs link]: https://docs.rs/bit-vec/0.9.1/bit_vec/ -[github ci badge]: https://github.com/contain-rs/bit-vec/actions/workflows/rust.yml/badge.svg -[rustc 1.82+]: https://img.shields.io/badge/rustc-1.82%2B-blue.svg -[deps.rs status]: https://deps.rs/crate/bit-vec/0.9.1/status.svg -[deps.rs link]: https://deps.rs/crate/bit-vec/0.9.1 -[shields.io download count]: https://img.shields.io/crates/d/bit-vec.svg - -## Usage - -Add this to your Cargo.toml: - -```toml -[dependencies] -bit-vec = "0.9" -``` - -If you want [serde](https://github.com/serde-rs/serde) support, include the feature like this: - -```toml -[dependencies] -bit-vec = { version = "0.9", features = ["serde"] } -``` - -If you want to use bit-vec in a program that has `#![no_std]`, just drop default features: - -```toml -[dependencies] -bit-vec = { version = "0.9", default-features = false } -``` - -If you want to use serde with the alloc crate instead of std, just use the `serde_no_std` feature: - -```toml -[dependencies] -bit-vec = { version = "0.9", default-features = false, features = ["serde", "serde_no_std"] } -``` - -If you want [borsh-rs](https://github.com/near/borsh-rs) support, include it like this: - -```toml -[dependencies] -bit-vec = { version = "0.9", features = ["borsh"] } -``` - -Other available serialization libraries can be enabled with the -[`miniserde`](https://github.com/dtolnay/miniserde) and -[`nanoserde`](https://github.com/not-fl3/nanoserde) features. - - - -### Description - -Dynamic collections implemented with compact bit vectors. - -### Examples - -This is a simple example of the [Sieve of Eratosthenes][sieve] -which calculates prime numbers up to a given limit. - -[sieve]: http://en.wikipedia.org/wiki/Sieve_of_Eratosthenes - -```rust -use bit_vec::BitVec; - -let max_prime = 10000; - -// Store the primes as a BitVec -let primes = { - // Assume all numbers are prime to begin, and then we - // cross off non-primes progressively - let mut bv = BitVec::from_elem(max_prime, true); - - // Neither 0 nor 1 are prime - bv.set(0, false); - bv.set(1, false); - - for i in 2.. 1 + (max_prime as f64).sqrt() as usize { - // if i is a prime - if bv[i] { - // Mark all multiples of i as non-prime (any multiples below i * i - // will have been marked as non-prime previously) - for j in i.. { - if i * j >= max_prime { - break; - } - bv.set(i * j, false) - } - } - } - bv -}; - -// Simple primality tests below our max bound -let print_primes = 20; -print!("The primes below {} are: ", print_primes); -for x in 0..print_primes { - if primes.get(x).unwrap_or(false) { - print!("{} ", x); - } -} -println!(); - -let num_primes = primes.iter().filter(|x| *x).count(); -println!("There are {} primes below {}", num_primes, max_prime); -assert_eq!(num_primes, 1_229); -``` - - - -## License - -Dual-licensed for compatibility with the Rust project. - -Licensed under the Apache License Version 2.0: http://www.apache.org/licenses/LICENSE-2.0, -or the MIT license: http://opensource.org/licenses/MIT, at your option. diff --git a/README.md b/README.md new file mode 120000 index 0000000..e169d09 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +vec/README.md \ No newline at end of file diff --git a/RELEASES.md b/RELEASES.md index 33b230d..9ce7390 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -3,6 +3,8 @@ Version 0.10.0 (TO BE RELEASED) +- removed nanoserde support + Version 0.9.1 ========================== diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..8505faf --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,2 @@ +/out + diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..d82c684 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "bit-fuzz" +version = "0.1.0" +authors = ["Dawid Ciężarkiewicz ", "Peter Blackson "] +edition = "2021" +publish = false + +[package.metadata] +cargo-fuzz = true + +[features] +default = ["afl"] +afl_fuzz = ["afl"] +honggfuzz_fuzz = ["honggfuzz"] + +[dependencies] +honggfuzz = { version = "0.5", optional = true } +afl = { version = "0.17", optional = true } +bit-vec = { path = "../vec/" } +bit-set = { path = "../set/" } +bit-matrix = { path = "../matrix/" } +# smallvec = "1.15" + +[[bin]] +name = "bit_ops" +path = "fuzz_targets/bit_ops.rs" diff --git a/fuzz/LICENSE-APACHE b/fuzz/LICENSE-APACHE new file mode 120000 index 0000000..965b606 --- /dev/null +++ b/fuzz/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/fuzz/LICENSE-MIT b/fuzz/LICENSE-MIT new file mode 120000 index 0000000..76219eb --- /dev/null +++ b/fuzz/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000..a31515a --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,9 @@ +# fuzzer for bit-vec, bit-set and bit-matrix + +Based on fuzzing in `smallvec`. + +# fuzzing + +```sh +cargo afl build --release --bin bit_ops --features afl && cargo afl fuzz -i in -o out target/release/bit_ops +``` diff --git a/fuzz/fuzz_targets/bit_ops.rs b/fuzz/fuzz_targets/bit_ops.rs new file mode 100644 index 0000000..1ea5f5a --- /dev/null +++ b/fuzz/fuzz_targets/bit_ops.rs @@ -0,0 +1,273 @@ +//! Simple fuzzer testing all available `BitVec`, `BitSet` and `BitMatrix` operations + +use bit_vec::{BitBlock, BitVec}; +use bit_set::BitSet; +// use smallvec::SmallVec; + +// There's no point growing too much, so try not to grow +// over this size. +const CAP_GROWTH: usize = 256; + +macro_rules! next_usize { + ($b:ident) => { + $b.next().unwrap_or(0) as usize + }; +} + +macro_rules! next_u8 { + ($b:ident) => { + $b.next().unwrap_or(0) + }; +} + +fn black_box_bit_vec(s: &BitVec) { + // print to work as a black_box + print!("{}", s); +} + +fn black_box_bit_set(s: &BitSet) { + // print to work as a black_box + print!("{}", s); +} + +fn do_test(data: &[u8]) -> BitVec { + let mut v = BitVec::::new_general(); + + let mut bytes = data.iter().copied(); + + while let Some(op) = bytes.next() { + match op % 23 { + 0 => { + v = BitVec::new_general(); + } + 1 => { + v = BitVec::with_capacity_general(next_usize!(bytes)); + } + 2 => { + v = BitVec::from_bytes_general(&v.to_bytes()[..]); + } + 3 => {} + 4 => { + if v.len() < CAP_GROWTH { + v.push(next_u8!(bytes) < 128) + } + } + 5 => { + v.pop(); + } + 6 => v.grow(next_usize!(bytes) + v.len(), next_u8!(bytes) < 128), + 7 => { + if v.len() < CAP_GROWTH { + v.reserve(next_usize!(bytes)) + } + } + 8 => { + if v.len() < CAP_GROWTH { + v.reserve_exact(next_usize!(bytes)) + } + } + 9 => v.shrink_to_fit(), + 10 => v.truncate(next_usize!(bytes)), + 11 => black_box_bit_vec(&v), + 12 => { + if !v.is_empty() { + v.remove(next_usize!(bytes) % v.len()); + } + } + 13 => { + v.fill(false); + } + 14 => { + if !v.is_empty() { + v.remove(next_usize!(bytes) % v.len()); + } + } + 15 => { + let insert_pos = next_usize!(bytes) % (v.len() + 1); + v.insert(insert_pos, next_u8!(bytes) < 128); + } + + 16 => { + v = BitVec::from_bytes_general(&v.to_bytes()[..]); + } + + 17 => { + v = BitVec::from_bytes_general(data); + } + + 18 => { + if v.len() < CAP_GROWTH { + let mut v2 = BitVec::::from_bytes_general(data); + v.append(&mut v2); + } + } + + 19 => { + if v.len() < CAP_GROWTH { + v.reserve(next_usize!(bytes)); + } + } + + 20 => { + if v.len() < CAP_GROWTH { + v.reserve_exact(next_usize!(bytes)); + } + } + 21 => { + let slice = vec![next_u8!(bytes); next_usize!(bytes)]; + v = BitVec::::from_bytes_general(&slice[..]); + } + 22 => { + v.fill(true); + } + _ => panic!("booo"), + } + } + v +} + +fn do_test_set(data: &[u8]) -> BitSet { + let mut v = BitSet::::new_general(); + + let mut bytes = data.iter().copied(); + + while let Some(op) = bytes.next() { + match op % 16 { + 0 => { + v = BitSet::new_general(); + } + 1 => { + v = BitSet::with_capacity_general(next_usize!(bytes)); + } + 2 => { + v = BitSet::from_bytes_general(&v.get_ref().to_bytes()[..]); + } + 3 => { + if v.get_ref().len() < CAP_GROWTH { + v.reserve_len(next_usize!(bytes)) + } + } + 4 => { + if v.get_ref().len() < CAP_GROWTH { + v.reserve_len_exact(next_usize!(bytes)) + } + } + 5 => v.shrink_to_fit(), + 6 => v.truncate(next_usize!(bytes)), + 7 => black_box_bit_set(&v), + 8 => { + if !v.is_empty() { + v.remove(next_usize!(bytes) % v.get_ref().len()); + } + } + 9 => { + v.reset(); + } + 10 => { + let insert_pos = next_usize!(bytes) % (v.get_ref().len() + 1); + v.insert(insert_pos); + } + + 11 => { + v = BitSet::from_bytes_general(&v.get_ref().to_bytes()[..]); + } + + 12 => { + v = BitSet::from_bytes_general(data); + } + + 13 => { + if v.get_ref().len() < CAP_GROWTH { + v.reserve_len(next_usize!(bytes)); + } + } + + 14 => { + if v.get_ref().len() < CAP_GROWTH { + v.reserve_len_exact(next_usize!(bytes)); + } + } + 15 => { + let slice = vec![next_u8!(bytes); next_usize!(bytes)]; + v = BitSet::::from_bytes_general(&slice[..]); + } + _ => panic!("booo"), + } + } + v +} + +fn do_test_all(data: &[u8]) { + do_test::(data); + do_test::(data); + do_test::(data); + do_test::(data); + // do_test::>(data); + // do_test::>(data); + + do_test_set::(data); + do_test_set::(data); + do_test_set::(data); + do_test_set::(data); + // do_test_set::>(data); + // do_test_set::>(data); +} + +#[cfg(feature = "afl")] +fn main() { + afl::fuzz!(|data| { + // Remove the panic hook so we can actually catch panic + // See https://github.com/rust-fuzz/afl.rs/issues/150 + std::panic::set_hook(Box::new(|_| {})); + do_test_all(data); + }); +} + +#[cfg(feature = "honggfuzz")] +fn main() { + loop { + honggfuzz::fuzz!(|data| { + // Remove the panic hook so we can actually catch panic + // See https://github.com/rust-fuzz/afl.rs/issues/150 + std::panic::set_hook(Box::new(|_| {})); + do_test_all(data); + }); + } +} + +#[cfg(test)] +mod tests { + fn extend_vec_from_hex(hex: &str, out: &mut Vec) { + let mut b = 0; + for (idx, c) in hex.as_bytes().iter().enumerate() { + b <<= 4; + match *c { + b'A'..=b'F' => b |= c - b'A' + 10, + b'a'..=b'f' => b |= c - b'a' + 10, + b'0'..=b'9' => b |= c - b'0', + b'\n' => {} + b' ' => {} + _ => panic!("Bad hex"), + } + if (idx & 1) == 1 { + out.push(b); + b = 0; + } + } + } + + #[test] + fn duplicate_crash() { + let mut a = Vec::new(); + // paste the output of `xxd -p ` here and run `cargo test` + extend_vec_from_hex( + r#" + 787c4a1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d4a1d1d1d1d1d1d1d + 1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d27271d1d1d1d1d1d2727fffe + 270a610a + "#, + &mut a, + ); + super::do_test_all(&a); + } +} diff --git a/fuzz/in/stub b/fuzz/in/stub new file mode 100644 index 0000000..587be6b --- /dev/null +++ b/fuzz/in/stub @@ -0,0 +1 @@ +x diff --git a/matrix/Cargo.toml b/matrix/Cargo.toml new file mode 100644 index 0000000..4d974c5 --- /dev/null +++ b/matrix/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "bit-matrix" +version.workspace = true +rust-version.workspace = true +authors = [ "Piotr Czarnecki " ] +description = "Library for bit matrices and vectors." +keywords = ["container", "bit", "bitfield", "algebra"] +documentation = "https://docs.rs/bit-matrix/latest/bit_matrix/" +repository = "https://github.com/pczarn/bit-matrix" +license = "MIT/Apache-2.0" +edition = "2021" + +[lib] +name = "bit_matrix" + +[dependencies] +serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } +miniserde = { version = "0.1", optional = true } +borsh = { version = "1.6.0", optional = true } +bit-vec = { path = "../vec/", default-features = false } + +[dev-dependencies] +serde_json = "1.0" + + +[features] +default = ["std"] +std = ["bit-vec/std", "serde?/std"] + +serde = ["dep:serde", "bit-vec/serde"] +miniserde = ["dep:miniserde", "bit-vec/miniserde"] +borsh = ["dep:borsh", "bit-vec/borsh"] diff --git a/matrix/LICENSE-APACHE b/matrix/LICENSE-APACHE new file mode 120000 index 0000000..965b606 --- /dev/null +++ b/matrix/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/matrix/LICENSE-MIT b/matrix/LICENSE-MIT new file mode 120000 index 0000000..76219eb --- /dev/null +++ b/matrix/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/matrix/README.md b/matrix/README.md new file mode 100644 index 0000000..1240190 --- /dev/null +++ b/matrix/README.md @@ -0,0 +1,105 @@ +
+

bit-matrix

+

+ A compact matrix of bits. +

+

+ +[![crates.io][crates.io shield]][crates.io link] +[![Documentation][docs.rs badge]][docs.rs link] +![Rust CI][github ci badge] +![MSRV][rustc 1.82+] +
+
+[![Dependency Status][deps.rs status]][deps.rs link] +[![Download Status][shields.io download count]][crates.io link] + +

+
+ +[crates.io shield]: https://img.shields.io/crates/v/bit-matrix?label=latest +[crates.io link]: https://crates.io/crates/bit-matrix +[docs.rs badge]: https://docs.rs/bit-matrix/badge.svg?version=0.8.1 +[docs.rs link]: https://docs.rs/bit-matrix/0.8.1/bit-matrix/ +[github ci badge]: https://github.com/pczarn/bit-matrix/workflows/CI/badge.svg?branch=master +[rustc 1.82+]: https://img.shields.io/badge/rustc-1.82%2B-blue.svg +[deps.rs status]: https://deps.rs/crate/bit-matrix/0.8.1/status.svg +[deps.rs link]: https://deps.rs/crate/bit-matrix/0.8.1 +[shields.io download count]: https://img.shields.io/crates/d/bit-matrix.svg + +Rust library that implements bit matrices. +[You can check the documentation here](https://docs.rs/bit-matrix/latest/bit_matrix/). + +Built on top of [contain-rs/bit-vec](https://github.com/contain-rs/bit-vec/). + + + +Implements bit matrices. + +# Examples + +Gets a mutable reference to the square bit matrix within this +rectangular matrix, then performs a transitive closure. + +```rust +use bit_matrix::BitMatrix; + +let mut matrix = ::new(7, 5); +matrix.set(1, 2, true); +matrix.set(2, 3, true); +matrix.set(3, 4, true); + +{ + let mut sub_matrix = matrix.sub_matrix_mut(1 .. 6); + sub_matrix.transitive_closure(); +} +assert!(matrix[(1, 4)]); + +matrix.reflexive_closure(); +assert!(matrix[(0, 0)]); +assert!(matrix[(1, 1)]); +assert!(matrix[(2, 2)]); +assert!(matrix[(3, 3)]); +``` + +This simple example calculates the transitive closure of 4x4 bit matrix. + +```rust +use bit_matrix::BitMatrix; + +let mut matrix = ::new(4, 4); +let points = &[ + (0, 0), + (0, 1), + (0, 3), + (1, 0), + (1, 2), + (2, 0), + (2, 1), + (3, 1), + (3, 3), +]; +for &(i, j) in points { + matrix.set(i, j, true); +} +matrix.transitive_closure(); + +let mut expected_matrix = BitMatrix::new(4, 4); +for i in 0..4 { + for j in 0..4 { + expected_matrix.set(i, j, true); + } +} + +assert_eq!(matrix, expected_matrix); +``` + + + +## License + +Dual-licensed for compatibility with the Rust project. + +Licensed under the Apache License Version 2.0: +http://www.apache.org/licenses/LICENSE-2.0, or the MIT license: +http://opensource.org/licenses/MIT, at your option. diff --git a/matrix/docs/README.md b/matrix/docs/README.md new file mode 100644 index 0000000..ae701e8 --- /dev/null +++ b/matrix/docs/README.md @@ -0,0 +1,4 @@ +## ch08-2.pdf + +A file from Winona State University about the transitive closure +algorithm on pages 4-5. diff --git a/matrix/docs/ch08-2.pdf b/matrix/docs/ch08-2.pdf new file mode 100644 index 0000000..4047169 Binary files /dev/null and b/matrix/docs/ch08-2.pdf differ diff --git a/matrix/src/lib.rs b/matrix/src/lib.rs new file mode 100644 index 0000000..d215c4a --- /dev/null +++ b/matrix/src/lib.rs @@ -0,0 +1,91 @@ +//! Implements bit matrices. +//! +//! # Examples +//! +//! Gets a mutable reference to the square bit matrix within this +//! rectangular matrix, then performs a transitive closure. +//! +//! ```rust +//! use bit_matrix::BitMatrix; +//! +//! let mut matrix = ::new(7, 5); +//! matrix.set(1, 2, true); +//! matrix.set(2, 3, true); +//! matrix.set(3, 4, true); +//! +//! { +//! let mut sub_matrix = matrix.sub_matrix_mut(1 .. 6); +//! sub_matrix.transitive_closure(); +//! } +//! assert!(matrix[(1, 4)]); +//! +//! matrix.reflexive_closure(); +//! assert!(matrix[(0, 0)]); +//! assert!(matrix[(1, 1)]); +//! assert!(matrix[(2, 2)]); +//! assert!(matrix[(3, 3)]); +//! ``` +//! +//! This simple example calculates the transitive closure of 4x4 bit matrix. +//! +//! ```rust +//! use bit_matrix::BitMatrix; +//! +//! let mut matrix = ::new(4, 4); +//! let points = &[ +//! (0, 0), +//! (0, 1), +//! (0, 3), +//! (1, 0), +//! (1, 2), +//! (2, 0), +//! (2, 1), +//! (3, 1), +//! (3, 3), +//! ]; +//! for &(i, j) in points { +//! matrix.set(i, j, true); +//! } +//! matrix.transitive_closure(); +//! +//! let mut expected_matrix = BitMatrix::new(4, 4); +//! for i in 0..4 { +//! for j in 0..4 { +//! expected_matrix.set(i, j, true); +//! } +//! } +//! +//! assert_eq!(matrix, expected_matrix); +//! ``` + +#![deny( + missing_docs, + missing_copy_implementations, + trivial_casts, + trivial_numeric_casts, + unused_import_braces, + unused_qualifications +)] +#![cfg_attr(test, deny(warnings))] +#![no_std] +#![deny(clippy::shadow_reuse)] +#![deny(clippy::shadow_same)] +#![deny(clippy::shadow_unrelated)] +#![warn(clippy::multiple_inherent_impl)] +#![warn(clippy::multiple_crate_versions)] +#![warn(clippy::single_match)] +#![warn(clippy::missing_safety_doc)] + +mod matrix; +mod row; +mod submatrix; +mod util; + +pub use matrix::BitMatrix; + +pub(crate) mod local_prelude { + pub use crate::row::BitSlice; + pub use crate::submatrix::{BitSubMatrix, BitSubMatrixMut}; + pub use crate::util::{FALSE, TRUE}; + pub use bit_vec::{BitBlock, BitVec}; +} diff --git a/matrix/src/matrix.rs b/matrix/src/matrix.rs new file mode 100644 index 0000000..8c8406d --- /dev/null +++ b/matrix/src/matrix.rs @@ -0,0 +1,255 @@ +//! Matrix of bits. + +use core::cmp; +use core::ops::{Index, IndexMut, RangeBounds}; + +use crate::local_prelude::*; +use crate::util::round_up_to_next; + +/// A matrix of bits. +#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr( + feature = "miniserde", + derive(miniserde::Serialize, miniserde::Deserialize) +)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct BitMatrix { + bit_vec: BitVec, + row_bits: usize, +} + +// Matrix + +impl BitMatrix { + /// Create a new BitMatrix with specific numbers of bits in columns and rows. + pub fn new(rows: usize, row_bits: usize) -> Self { + BitMatrix { + bit_vec: BitVec::from_elem_general(round_up_to_next(row_bits, B::bits()) * rows, false), + row_bits, + } + } + + /// Returns the number of rows. + #[inline] + fn num_rows(&self) -> usize { + if self.row_bits == 0 { + 0 + } else { + let row_blocks = round_up_to_next(self.row_bits, B::bits()) / B::bits(); + self.bit_vec.storage().len() / row_blocks + } + } + + /// Returns the number of columns. + #[inline] + pub fn num_cols(&self) -> usize { + self.row_bits + } + + /// Returns the matrix's size as `(rows, columns)`. + pub fn size(&self) -> (usize, usize) { + (self.num_rows(), self.row_bits) + } + + /// Sets the value of a bit. + /// + /// # Panics + /// + /// Panics if `(row, col)` is out of bounds. + #[inline] + pub fn set(&mut self, row: usize, col: usize, enabled: bool) { + let row_size_in_bits = round_up_to_next(self.row_bits, B::bits()); + self.bit_vec.set(row * row_size_in_bits + col, enabled); + } + + /// Sets the value of all bits. + #[inline] + pub fn fill(&mut self, enabled: bool) { + self.bit_vec.fill(enabled); + } + + /// Grows the matrix in-place, adding `num_rows` rows filled with `value`. + pub fn grow(&mut self, num_rows: usize, value: bool) { + self.bit_vec + .grow(round_up_to_next(self.row_bits, B::bits()) * num_rows, value); + } + + /// Truncates the matrix. + pub fn truncate(&mut self, num_rows: usize) { + self.bit_vec + .truncate(round_up_to_next(self.row_bits, B::bits()) * num_rows); + } + + /// Returns a slice of the matrix's rows. + #[inline] + pub fn sub_matrix>(&self, range: R) -> BitSubMatrix<'_, B> { + let row_size = round_up_to_next(self.row_bits, B::bits()) / B::bits(); + BitSubMatrix { + slice: &self.bit_vec.storage()[( + range.start_bound().map(|&s| s * row_size), + range.end_bound().map(|&e| e * row_size), + )], + row_bits: self.row_bits, + } + } + + /// Returns a slice of the matrix's rows. + #[inline] + pub fn sub_matrix_mut>(&mut self, range: R) -> BitSubMatrixMut<'_, B> { + let row_size = self.row_size(); + // Safety: + // + unsafe { + BitSubMatrixMut { + slice: &mut self.bit_vec.storage_mut()[( + range.start_bound().map(|&s| s * row_size), + range.end_bound().map(|&e| e * row_size), + )], + row_bits: self.row_bits, + } + } + } + + fn row_size(&self) -> usize { + round_up_to_next(self.row_bits, B::bits()) / B::bits() + } + + /// Given a row's index, returns a slice of all rows above that row, a reference to said row, + /// and a slice of all rows below. + /// + /// Functionally equivalent to `(self.sub_matrix(0..row), &self[row], + /// self.sub_matrix(row..self.num_rows()))`. + #[inline] + pub fn split_at(&self, row: usize) -> (BitSubMatrix<'_, B>, BitSubMatrix<'_, B>) { + ( + self.sub_matrix(0..row), + self.sub_matrix(row..self.num_rows()), + ) + } + + /// Given a row's index, returns a slice of all rows above that row, a reference to said row, + /// and a slice of all rows below. + #[inline] + pub fn split_at_mut(&mut self, row: usize) -> (BitSubMatrixMut<'_, B>, BitSubMatrixMut<'_, B>) { + let row_size = round_up_to_next(self.row_bits, B::bits()) / B::bits(); + let (first, second) = unsafe { self.bit_vec.storage_mut().split_at_mut(row * row_size) }; + ( + BitSubMatrixMut::new(first, self.row_bits), + BitSubMatrixMut::new(second, self.row_bits), + ) + } + + /// Iterate over bits in the specified row. + pub fn iter_row(&self, row: usize) -> impl Iterator + '_ { + BitSlice::new(&self[row].slice).iter_bits(self.row_bits) + } + + /// Computes the transitive closure of the binary relation + /// represented by this square bit matrix. + /// + /// Modifies this matrix in place using Warshall's algorithm. + /// + /// After this operation, the matrix will describe a transitive + /// relation. This means that, for any indices `a`, `b`, `c`, + /// if `M[(a, b)]` and `M[(b, c)]`, then `M[(a, c)]`. + /// + /// # Complexity + /// + /// The time complexity is **O(n^3)**, where `n` is the number + /// of columns and rows. + /// + /// # Panics + /// + /// The matrix must be square for this operation to succeed. + pub fn transitive_closure(&mut self) { + Into::>::into(self).transitive_closure(); + } + + /// Determines whether the number of rows equals the number of columns. + /// + /// This means the matrix is square. + pub fn is_square(&self) -> bool { + self.num_rows() == self.row_bits + } + + /// Determines whether the matrix is empty. + pub fn is_empty(&self) -> bool { + self.size() == (0, 0) + } + + /// Computes the reflexive closure of the binary relation represented by + /// this bit matrix. The matrix can be rectangular. + /// + /// The reflexive closure means that for every `x`` that will be within bounds, + /// `M[(x, x)]` is true. + /// + /// In other words, modifies this matrix in-place by making all + /// bits on the diagonal set. + pub fn reflexive_closure(&mut self) { + for i in 0..cmp::min(self.row_bits, self.num_rows()) { + self.set(i, i, true); + } + } +} + +/// Gains immutable access to the matrix's row in the form of a `BitSlice`. +impl Index for BitMatrix { + type Output = BitSlice; + + #[inline] + fn index(&self, row: usize) -> &Self::Output { + let row_size = round_up_to_next(self.row_bits, B::bits()) / B::bits(); + BitSlice::new(&self.bit_vec.storage()[row * row_size..(row + 1) * row_size]) + } +} + +/// Gains mutable access to the matrix's row in the form of a `BitSlice`. +impl IndexMut for BitMatrix { + #[inline] + fn index_mut(&mut self, row: usize) -> &mut Self::Output { + let row_size = round_up_to_next(self.row_bits, B::bits()) / B::bits(); + // Safety: + // This does not introduce any memory unsafety despite the `unsafe` keyword. + unsafe { + BitSlice::new_mut(&mut self.bit_vec.storage_mut()[row * row_size..(row + 1) * row_size]) + } + } +} + +/// Returns `true` if a bit is enabled in the matrix, or `false` otherwise. +/// +/// The first index in the tuple is row number, and the second is column +/// number. +impl Index<(usize, usize)> for BitMatrix { + type Output = bool; + + #[inline] + fn index(&self, (row, col): (usize, usize)) -> &bool { + let row_size_in_bits = round_up_to_next(self.row_bits, B::bits()); + if self.bit_vec.get(row * row_size_in_bits + col).unwrap() { + &TRUE + } else { + &FALSE + } + } +} + +impl<'a, B: BitBlock> From<&'a mut BitMatrix> for BitSubMatrixMut<'a, B> { + fn from(value: &'a mut BitMatrix) -> Self { + unsafe { BitSubMatrixMut::new(value.bit_vec.storage_mut(), value.row_bits) } + } +} + +// Tests + +#[test] +fn test_empty() { + let mut matrix = ::new(0, 0); + for _ in 0..3 { + assert_eq!(matrix.num_rows(), 0); + assert_eq!(matrix.size(), (0, 0)); + assert!(matrix.is_square()); + assert!(matrix.is_empty()); + matrix.transitive_closure(); + } +} diff --git a/matrix/src/row.rs b/matrix/src/row.rs new file mode 100644 index 0000000..740e3b7 --- /dev/null +++ b/matrix/src/row.rs @@ -0,0 +1,102 @@ +//! Implements access to a matrix's individual rows. + +use core::{mem, ops}; + +use bit_vec::BitBlock; + +use crate::local_prelude::*; +use crate::util::div_rem; + +/// A slice of bit vector's blocks. +pub struct BitSlice { + pub(crate) slice: [Block], +} + +impl BitSlice { + /// Creates a new slice from a slice of blocks. + #[inline] + pub fn new(slice: &[Block]) -> &Self { + // Safety: + // This is the only way to construct a custom DST. + // We wish the layout of DSTs were defined. + unsafe { mem::transmute(slice) } + } + + /// Creates a new slice from a mutable slice of blocks. + #[inline] + pub fn new_mut(slice: &mut [Block]) -> &mut Self { + // Safety: + // This is the only way to construct a custom DST. + // We wish the layout of DSTs were defined. + unsafe { mem::transmute(slice) } + } + + /// Iterates over bits. + #[inline] + pub fn iter_bits(&self, len: usize) -> impl Iterator + '_ { + (0..len).map(|i| self[i]) + } + + /// Iterates over the slice's blocks. + pub fn iter_blocks(&self) -> impl Iterator { + self.slice.iter() + } + + /// Iterates over the slice's blocks, yielding mutable references. + pub fn iter_blocks_mut(&mut self) -> impl Iterator { + self.slice.iter_mut() + } + + /// Returns `true` if a bit is enabled in the bit vector slice, or `false` otherwise. + #[inline] + pub fn get(&self, bit: usize) -> bool { + let (block, i) = div_rem(bit, Block::bits()); + match self.slice.get(block) { + None => false, + Some(&b) => (b & (Block::one() << i)) != Block::zero(), + } + } + + /// Returns a small integer-sized slice of the bit vector slice. + #[inline] + pub fn small_slice_aligned(&self, bit: usize, len: u8) -> Block { + let (block, i) = div_rem(bit, Block::bits()); + match self.slice.get(block) { + None => Block::zero(), + Some(&b) => { + let len_mask = (Block::one() << len as usize) - Block::one(); + (b >> i) & len_mask + } + } + } +} + +/// Returns `true` if a bit is enabled in the bit vector slice, +/// or `false` otherwise. +impl ops::Index for BitSlice { + type Output = bool; + + #[inline] + fn index(&self, bit: usize) -> &bool { + let (block, i) = div_rem(bit, Block::bits()); + match self.slice.get(block) { + None => &FALSE, + Some(&b) => { + if (b & (Block::one() << i)) != Block::zero() { + &TRUE + } else { + &FALSE + } + } + } + } +} + +impl ops::BitOrAssign for &mut BitSlice { + fn bitor_assign(&mut self, rhs: Self) { + debug_assert_eq!(self.slice.len(), rhs.slice.len()); + for (dst, src) in self.iter_blocks_mut().zip(rhs.iter_blocks()) { + *dst |= *src; + } + } +} diff --git a/matrix/src/submatrix.rs b/matrix/src/submatrix.rs new file mode 100644 index 0000000..3cedd5f --- /dev/null +++ b/matrix/src/submatrix.rs @@ -0,0 +1,298 @@ +//! Submatrix of bits. + +use core::cmp; +use core::fmt; +use core::mem; +use core::ops::RangeBounds; +use core::ops::{Index, IndexMut}; +use core::slice; + +use crate::local_prelude::*; +use crate::util::{div_rem, round_up_to_next}; + +/// Immutable access to a range of matrix's rows. +pub struct BitSubMatrix<'a, B: BitBlock> { + pub(crate) slice: &'a [B], + pub(crate) row_bits: usize, +} + +/// Mutable access to a range of matrix's rows. +pub struct BitSubMatrixMut<'a, B: BitBlock> { + pub(crate) slice: &'a mut [B], + pub(crate) row_bits: usize, +} + +impl<'a, B: BitBlock> BitSubMatrix<'a, B> { + // /// Returns a new BitSubMatrix. + // pub(crate) fn new(slice: &[B], row_bits: usize) -> BitSubMatrix<'_, B> { + // BitSubMatrix { slice, row_bits } + // } + + /// Forms a BitSubMatrix from a pointer and dimensions. + /// + /// # Safety + /// + /// Can construct an ill-formed value, thus the function is marked as + /// unsafe. + #[inline] + pub unsafe fn from_raw_parts(ptr: *const B, rows: usize, row_bits: usize) -> Self { + BitSubMatrix { + slice: slice::from_raw_parts( + ptr, + round_up_to_next(row_bits, B::bits()) / B::bits() * rows, + ), + row_bits, + } + } + + /// Iterates over the matrix's rows in the form of immutable slices. + pub fn iter(&self) -> impl Iterator> { + fn f(arg: &[B]) -> &BitSlice { + // Safety: + // This is currently the only way to construct a custom DST. + // We wish the layout of DSTs were defined. + unsafe { mem::transmute(arg) } + } + let row_size = round_up_to_next(self.row_bits, B::bits()) / B::bits(); + self.slice.chunks(row_size).map(f::) + } + + fn row_size(&self) -> usize { + round_up_to_next(self.row_bits, B::bits()) / B::bits() + } +} + +impl<'a, B: BitBlock> BitSubMatrixMut<'a, B> { + /// Returns a new `BitSubMatrixMut`. + pub(crate) fn new(slice: &mut [B], row_bits: usize) -> BitSubMatrixMut<'_, B> { + BitSubMatrixMut { slice, row_bits } + } + + /// Forms a `BitSubMatrix` from a pointer and dimensions. + /// + /// # Safety + /// + /// Can construct an ill-formed value, thus the function is unsafe. + #[inline] + pub unsafe fn from_raw_parts(ptr: *mut B, rows: usize, row_bits: usize) -> Self { + BitSubMatrixMut { + slice: slice::from_raw_parts_mut( + ptr, + round_up_to_next(row_bits, B::bits()) / B::bits() * rows, + ), + row_bits, + } + } + + /// Returns the number of rows. + #[inline] + fn num_rows(&self) -> usize { + self.slice.len().checked_div(self.row_size()).unwrap_or(0) + } + + /// Returns the number of columns. + #[inline] + pub fn num_cols(&self) -> usize { + self.row_bits + } + + /// Sets the value of a bit. The first argument is the row number. + /// + /// # Panics + /// + /// Panics if `(row, col)` is out of bounds. + #[inline] + pub fn set(&mut self, row: usize, col: usize, enabled: bool) { + let row_size_in_bits = round_up_to_next(self.row_bits, B::bits()); + let bit = row * row_size_in_bits + col; + let (block, i) = div_rem(bit, B::bits()); + assert!( + block < self.slice.len() && col < self.row_bits, + "invalid index given to `BitSubMatrixMut::set`" + ); + unsafe { + // Safety: + // We check for `block` being within bounds in the assert above. + let elt = self.slice.get_unchecked_mut(block); + if enabled { + *elt |= B::one() << i; + } else { + *elt = *elt & !(B::one() << i); + } + } + } + + /// Sets the value of a bit. The first argument is the row number. + /// + /// # Safety + /// + /// Unsafe if `(row, col)` is out of bounds. + #[inline] + pub unsafe fn set_unchecked(&mut self, row: usize, col: usize, enabled: bool) { + let row_size_in_bits = round_up_to_next(self.row_bits, B::bits()); + let bit = row * row_size_in_bits + col; + let (block, i) = div_rem(bit, B::bits()); + unsafe { + // Safety: + // Unsafe if `(row, col)` is out of bounds. + let elt = self.slice.get_unchecked_mut(block); + if enabled { + *elt |= B::one() << i; + } else { + *elt = *elt & !(B::one() << i); + } + } + } + + /// Returns a slice of the matrix's rows. + pub fn sub_matrix>(&self, range: R) -> BitSubMatrix<'_, B> { + BitSubMatrix { + slice: &self.slice[( + range.start_bound().map(|&s| s * self.row_size()), + range.end_bound().map(|&e| e * self.row_size()), + )], + row_bits: self.row_bits, + } + } + + /// Given a row's index, returns a slice of all rows above that row, a reference to said row, + /// and a slice of all rows below. + /// + /// Functionally equivalent to `(self.sub_matrix(0..row), &self[row], + /// self.sub_matrix(row..self.num_rows()))`. + #[inline] + pub fn split_at(&self, row: usize) -> (BitSubMatrix<'_, B>, BitSubMatrix<'_, B>) { + ( + self.sub_matrix(0..row), + self.sub_matrix(row..self.num_rows()), + ) + } + + /// Given a row's index, returns a slice of all rows above that row, a reference to said row, + /// and a slice of all rows below. + #[inline] + pub fn split_at_mut(&mut self, row: usize) -> (BitSubMatrixMut<'_, B>, BitSubMatrixMut<'_, B>) { + let (first, second) = self.slice.split_at_mut(row * self.row_size()); + ( + BitSubMatrixMut::new(first, self.row_bits), + BitSubMatrixMut::new(second, self.row_bits), + ) + } + + /// Computes the transitive closure of the binary relation + /// represented by this square bit matrix. + /// + /// Modifies this matrix in place using Warshall's algorithm. + /// + /// After this operation, the matrix will describe a transitive + /// relation. This means that, for any indices `a`, `b`, `c`, + /// if `M[(a, b)]` and `M[(b, c)]`, then `M[(a, c)]`. + /// + /// # Complexity + /// + /// The time complexity is **O(n^3)**, where `n` is the number + /// of columns and rows. + /// + /// # Panics + /// + /// The matrix must be square for this operation to succeed. + pub fn transitive_closure(&mut self) { + assert!(self.is_square()); + for pos in 0..self.row_bits { + let (mut rows0, mut rows1a) = self.split_at_mut(pos); + let (mut row, mut rows1b) = rows1a.split_at_mut(1); + for mut dst_row in rows0.iter_mut().chain(rows1b.iter_mut()) { + if dst_row[pos] { + dst_row |= &mut row[0]; + } + } + } + } + + /// Determines whether the number of rows equals the number of columns. + /// + /// This means the matrix is square. + fn is_square(&self) -> bool { + self.num_rows() == self.row_bits + } + + /// Computes the reflexive closure of the binary relation represented by + /// this bit matrix. The matrix can be rectangular. + /// + /// The reflexive closure means that for every `x`` that will be within bounds, + /// `M[(x, x)]` is true. + /// + /// In other words, modifies this matrix in-place by making all + /// bits on the diagonal set. + pub fn reflexive_closure(&mut self) { + for i in 0..cmp::min(self.row_bits, self.num_rows()) { + self.set(i, i, true); + } + } + + /// Iterates over the matrix's rows in the form of mutable slices. + pub fn iter_mut(&mut self) -> impl Iterator> { + fn f(arg: &mut [B]) -> &mut BitSlice { + // Safety: + // This is currently the only way to construct a custom DST. + // We wish the layout of DSTs were defined. + unsafe { mem::transmute(arg) } + } + self.slice.chunks_mut(self.row_size()).map(f::) + } + + fn row_size(&self) -> usize { + round_up_to_next(self.row_bits, B::bits()) / B::bits() + } +} + +/// Returns the matrix's row in the form of a mutable slice. +impl<'a, B: BitBlock> Index for BitSubMatrixMut<'a, B> { + type Output = BitSlice; + + #[inline] + fn index(&self, row: usize) -> &Self::Output { + // Safety: + // This is currently the only way to construct a custom DST. + // We wish the layout of DSTs were defined. + unsafe { mem::transmute(&self.slice[row * self.row_size()..(row + 1) * self.row_size()]) } + } +} + +/// Returns the matrix's row in the form of a mutable slice. +impl<'a, B: BitBlock> IndexMut for BitSubMatrixMut<'a, B> { + #[inline] + fn index_mut(&mut self, row: usize) -> &mut Self::Output { + let row_size = self.row_size(); + // Safety: + // This is currently the only way to construct a custom DST. + // We wish the layout of DSTs were defined. + unsafe { mem::transmute(&mut self.slice[row * row_size..(row + 1) * row_size]) } + } +} + +/// Returns the matrix's row in the form of a mutable slice. +impl<'a, B: BitBlock> Index for BitSubMatrix<'a, B> { + type Output = BitSlice; + + #[inline] + fn index(&self, row: usize) -> &Self::Output { + let row_size = self.row_size(); + // Safety: + // This is currently the only way to construct a custom DST. + // We wish the layout of DSTs were defined. + unsafe { mem::transmute(&self.slice[row * row_size..(row + 1) * row_size]) } + } +} + +impl<'a, B: BitBlock> fmt::Debug for BitSubMatrix<'a, B> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + for row in self.iter() { + for bit in row.iter_bits(self.row_bits) { + write!(fmt, "{}", if bit { 1 } else { 0 })?; + } + writeln!(fmt)?; + } + Ok(()) + } +} diff --git a/matrix/src/util.rs b/matrix/src/util.rs new file mode 100644 index 0000000..3bca17e --- /dev/null +++ b/matrix/src/util.rs @@ -0,0 +1,17 @@ +//! Arithmetic functions. + +#[inline] +pub fn div_rem(num: usize, divisor: usize) -> (usize, usize) { + (num / divisor, num % divisor) +} + +#[inline] +pub fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize { + assert!(target_alignment.is_power_of_two()); + (unrounded + target_alignment - 1) & !(target_alignment - 1) +} + +/// A value for borrowing through the `Index` trait. +pub static TRUE: bool = true; +/// A value for borrowing through the `Index` trait. +pub static FALSE: bool = false; diff --git a/matrix/tests/serialize_deserialize.rs b/matrix/tests/serialize_deserialize.rs new file mode 100644 index 0000000..fc78256 --- /dev/null +++ b/matrix/tests/serialize_deserialize.rs @@ -0,0 +1,53 @@ +#[cfg(feature = "serde")] +#[test] +fn test_serialize_deserialize_serde() { + use bit_matrix::BitMatrix; + + let mut expected_matrix = BitMatrix::new(4, 4); + let points = &[ + (0, 0), + (0, 1), + (0, 3), + (1, 0), + (1, 2), + (2, 0), + (2, 1), + (3, 1), + (3, 3), + ]; + for &(i, j) in points { + expected_matrix.set(i, j, true); + } + + let serialized = serde_json::to_string(&expected_matrix).unwrap(); + let matrix: BitMatrix = serde_json::from_str(serialized.as_str()).unwrap(); + + assert_eq!(matrix, expected_matrix); +} + +#[cfg(feature = "miniserde")] +#[test] +fn test_serialize_deserialize_miniserde() { + use bit_matrix::BitMatrix; + + let mut expected_matrix = BitMatrix::new(4, 4); + let points = &[ + (0, 0), + (0, 1), + (0, 3), + (1, 0), + (1, 2), + (2, 0), + (2, 1), + (3, 1), + (3, 3), + ]; + for &(i, j) in points { + expected_matrix.set(i, j, true); + } + + let serialized = miniserde::json::to_string(&expected_matrix); + let matrix: BitMatrix = miniserde::json::from_str(serialized.as_str()).unwrap(); + + assert_eq!(matrix, expected_matrix); +} diff --git a/matrix/tests/test_submatrix.rs b/matrix/tests/test_submatrix.rs new file mode 100644 index 0000000..c6aaa94 --- /dev/null +++ b/matrix/tests/test_submatrix.rs @@ -0,0 +1,26 @@ +use bit_matrix::BitMatrix; + +#[test] +fn test_submatrix() { + let mut matrix = ::new(5, 4); + let points = &[ + (0, 0), + (0, 1), + (0, 3), + (1, 0), + (1, 2), + (2, 0), + (2, 1), + (3, 1), + (3, 3), + (4, 3), + ]; + for &(i, j) in points { + matrix.set(i, j, true); + } + let submatrix = matrix.sub_matrix(1..=3); + let mut iter = submatrix.iter(); + assert!(iter.next().unwrap().get(0)); + assert!(!iter.next().unwrap().get(2)); + assert_eq!(iter.next().unwrap().small_slice_aligned(1, 3), 0b101); +} diff --git a/matrix/tests/transitive_closure.rs b/matrix/tests/transitive_closure.rs new file mode 100644 index 0000000..46f3b79 --- /dev/null +++ b/matrix/tests/transitive_closure.rs @@ -0,0 +1,30 @@ +use bit_matrix::BitMatrix; + +#[test] +fn test_transitive_closure() { + let mut matrix = ::new(4, 4); + let points = &[ + (0, 0), + (0, 1), + (0, 3), + (1, 0), + (1, 2), + (2, 0), + (2, 1), + (3, 1), + (3, 3), + ]; + for &(i, j) in points { + matrix.set(i, j, true); + } + matrix.transitive_closure(); + + let mut expected_matrix = BitMatrix::new(4, 4); + for i in 0..4 { + for j in 0..4 { + expected_matrix.set(i, j, true); + } + } + + assert_eq!(matrix, expected_matrix); +} diff --git a/set/Cargo.toml b/set/Cargo.toml new file mode 100644 index 0000000..29f1bd0 --- /dev/null +++ b/set/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "bit-set" +version.workspace = true +rust-version.workspace = true +authors = ["Alexis Beingessner "] +license = "Apache-2.0 OR MIT" +description = "A set of bits" +repository = "https://github.com/contain-rs/bit-set" +homepage = "https://github.com/contain-rs/bit-set" +documentation = "https://docs.rs/bit-set/" +keywords = ["data-structures", "bitset"] +readme = "README.md" +edition = "2021" + +[dependencies] +borsh = { version = "1.6.0", default-features = false, features = ["derive"], optional = true } +serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } +miniserde = { version = "0.1", optional = true } + +[dependencies.bit-vec] +path = "../vec/" +default-features = false + +[dev-dependencies] +rand = "0.10" +serde_json = "1.0" + +[features] +default = ["std"] +std = ["bit-vec/std"] + +borsh = ["dep:borsh", "bit-vec/borsh"] +serde = ["dep:serde", "bit-vec/serde"] +miniserde = ["dep:miniserde", "bit-vec/miniserde"] + +[package.metadata.docs.rs] +features = ["borsh", "serde", "miniserde"] diff --git a/set/LICENSE-APACHE b/set/LICENSE-APACHE new file mode 120000 index 0000000..965b606 --- /dev/null +++ b/set/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/set/LICENSE-MIT b/set/LICENSE-MIT new file mode 120000 index 0000000..76219eb --- /dev/null +++ b/set/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/set/README.md b/set/README.md new file mode 100644 index 0000000..a9433c4 --- /dev/null +++ b/set/README.md @@ -0,0 +1,128 @@ +
+

bit-set

+

+ A compact set of bits. +

+

+ +[![crates.io][crates.io shield]][crates.io link] +[![Documentation][docs.rs badge]][docs.rs link] +![Rust CI][github ci badge] +![rustc 1.82+] +
+
+[![Dependency Status][deps.rs status]][deps.rs link] +[![Download Status][shields.io download count]][crates.io link] + +

+
+ +[crates.io shield]: https://img.shields.io/crates/v/bit-set?label=latest +[crates.io link]: https://crates.io/crates/bit-set +[docs.rs badge]: https://docs.rs/bit-set/badge.svg?version=0.10.0 +[docs.rs link]: https://docs.rs/bit-set/0.10.0/bit_set/ +[github ci badge]: https://github.com/contain-rs/bit-set/workflows/Rust/badge.svg?branch=master +[rustc 1.82+]: https://img.shields.io/badge/rustc-1.82%2B-blue.svg +[deps.rs status]: https://deps.rs/crate/bit-set/0.10.0/status.svg +[deps.rs link]: https://deps.rs/crate/bit-set/0.10.0 +[shields.io download count]: https://img.shields.io/crates/d/bit-set.svg + +## Usage + +Add this to your Cargo.toml: + +```toml +[dependencies] +bit-set = "0.10" +``` + +Since Rust 2018, `extern crate` is no longer mandatory. If your edition is old (Rust 2015), +add this to your crate root: + +```rust +extern crate bit_set; +``` + +If you want to use `serde`, enable it with the `serde` feature: + +```toml +[dependencies] +bit-set = { version = "0.10", features = ["serde"] } +``` + +If you want to use bit-set in a program that has `#![no_std]`, just drop default features: + +```toml +[dependencies] +bit-set = { version = "0.10", default-features = false } +``` + +If you want to use serde with the alloc crate instead of std, use this: + +```toml +[dependencies] +bit-set = { version = "0.10", default-features = false, features = ["serde"] } +``` + +If you want [borsh-rs](https://github.com/near/borsh-rs) support, include it like this: + +```toml +[dependencies] +bit-set = { version = "0.10", features = ["borsh"] } +``` + +Another available serialization library can be enabled with the +[`miniserde`](https://github.com/dtolnay/miniserde) feature. + + + + +### Description + +An implementation of a set using a bit vector as an underlying +representation for holding unsigned numerical elements. + +It should also be noted that the amount of storage necessary for holding a +set of objects is proportional to the maximum of the objects when viewed +as a `usize`. + +### Examples + +```rust +use bit_set::BitSet; + +// It's a regular set +let mut s = BitSet::new(); +s.insert(0); +s.insert(3); +s.insert(7); + +s.remove(7); + +if !s.contains(7) { + println!("There is no 7"); +} + +// Can initialize from a `BitVec` +let other = BitSet::from_bytes(&[0b11010000]); + +s.union_with(&other); + +// Print 0, 1, 3 in some order +for x in s.iter() { + println!("{}", x); +} + +// Can convert back to a `BitVec` +let bv = s.into_bit_vec(); +assert!(bv[3]); +``` + + + +## License + +Dual-licensed for compatibility with the Rust project. + +Licensed under the Apache License Version 2.0: http://www.apache.org/licenses/LICENSE-2.0, +or the MIT license: http://opensource.org/licenses/MIT, at your option. diff --git a/set/RELEASES.md b/set/RELEASES.md new file mode 100644 index 0000000..642b954 --- /dev/null +++ b/set/RELEASES.md @@ -0,0 +1,34 @@ +Version 0.10.0 (not yet released) +======================================================== + + + +- removed nanoserde support +- moved the crate into a workspace + +Version 0.9.0 +======================================================== + + + +- Minimal Supported Rust Version is 1.82 +- Rust edition 2021 is used +- implemented `fn make_empty` +- implemented `fn reset` +- added general initialization functions: `fn new_general`, `fn from_bit_vec_general`, `fn with_capacity_general`, `fn from_bytes_general` + +Version 0.8.0 +======================================================== + + + +Version 0.7.0 (ZERO BREAKING CHANGES) +======================================================== + + + +- `serde::Serialize`, `Deserialize` is derived under the `serde` optional feature +- `impl Display` is implemented +- `impl Debug` has different output (we do not promise stable `Debug` output) +- `fn truncate` is implemented +- `fn get_mut` is implemented diff --git a/set/benches/bench.rs b/set/benches/bench.rs new file mode 100644 index 0000000..b7152b5 --- /dev/null +++ b/set/benches/bench.rs @@ -0,0 +1,58 @@ +// Copyright 2012-2024 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(test)] + +extern crate test; + +use bit_set::BitSet; +use bit_vec::BitVec; + +use rand::Rng; +use test::{black_box, Bencher}; + +const BENCH_BITS: usize = 1 << 14; +const BITS: usize = 32; + +#[bench] +fn bench_bit_vecset_small(b: &mut Bencher) { + let mut r = rand::rng(); + let mut bit_vec = BitSet::new(); + b.iter(|| { + for _ in 0..100 { + bit_vec.insert((r.next_u32() as usize) % BITS); + } + black_box(&bit_vec); + }); +} + +#[bench] +fn bench_bit_vecset_big(b: &mut Bencher) { + let mut r = rand::rng(); + let mut bit_vec = BitSet::new(); + b.iter(|| { + for _ in 0..100 { + bit_vec.insert((r.next_u32() as usize) % BENCH_BITS); + } + black_box(&bit_vec); + }); +} + +#[bench] +fn bench_bit_vecset_iter(b: &mut Bencher) { + let bit_vec = BitSet::from_bit_vec(BitVec::from_fn(BENCH_BITS, |idx| idx % 3 == 0)); + b.iter(|| { + let mut sum = 0; + for idx in &bit_vec { + sum += idx; + } + sum + }) +} diff --git a/set/src/iter.rs b/set/src/iter.rs new file mode 100644 index 0000000..e43722e --- /dev/null +++ b/set/src/iter.rs @@ -0,0 +1,376 @@ +use crate::{local_prelude::*, BitSet}; + +#[derive(Clone)] +struct BlockIter { + head: B, + head_offset: usize, + tail: T, +} + +impl BlockIter +where + T: Iterator, +{ + fn from_blocks(mut blocks: T) -> Self { + let h = blocks.next().unwrap_or(B::zero()); + BlockIter { + tail: blocks, + head: h, + head_offset: 0, + } + } +} + +#[allow(clippy::multiple_inherent_impl)] +impl BitSet { + /// Iterator over each usize stored in `self` union `other`. + /// See [`union_with`] for an efficient in-place version. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let a = BitSet::from_bytes(&[0b01101000]); + /// let b = BitSet::from_bytes(&[0b10100000]); + /// + /// // Print 0, 1, 2, 4 in arbitrary order + /// for x in a.union(&b) { + /// println!("{}", x); + /// } + /// ``` + /// + /// [`union_with`]: Self::union_with + #[inline] + pub fn union<'a>(&'a self, other: &'a Self) -> Union<'a, B> { + fn or(w1: B, w2: B) -> B { + w1 | w2 + } + + Union(BlockIter::from_blocks(TwoBitPositions { + set: self.bit_vec.blocks(), + other: other.bit_vec.blocks(), + merge: or, + })) + } + + /// Iterator over each usize stored in `self` intersect `other`. + /// See [`intersect_with`] for an efficient in-place version. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let a = BitSet::from_bytes(&[0b01101000]); + /// let b = BitSet::from_bytes(&[0b10100000]); + /// + /// // Print 2 + /// for x in a.intersection(&b) { + /// println!("{}", x); + /// } + /// ``` + /// + /// [`intersect_with`]: Self::intersect_with + #[inline] + pub fn intersection<'a>(&'a self, other: &'a Self) -> Intersection<'a, B> { + fn bitand(w1: B, w2: B) -> B { + w1 & w2 + } + let min = cmp::min(self.bit_vec.len(), other.bit_vec.len()); + + Intersection { + iter: BlockIter::from_blocks(TwoBitPositions { + set: self.bit_vec.blocks(), + other: other.bit_vec.blocks(), + merge: bitand, + }), + n: min, + } + } + + /// Iterator over each usize stored in the `self` setminus `other`. + /// See [`difference_with`] for an efficient in-place version. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let a = BitSet::from_bytes(&[0b01101000]); + /// let b = BitSet::from_bytes(&[0b10100000]); + /// + /// // Print 1, 4 in arbitrary order + /// for x in a.difference(&b) { + /// println!("{}", x); + /// } + /// + /// // Note that difference is not symmetric, + /// // and `b - a` means something else. + /// // This prints 0 + /// for x in b.difference(&a) { + /// println!("{}", x); + /// } + /// ``` + /// + /// [`difference_with`]: Self::difference_with + #[inline] + pub fn difference<'a>(&'a self, other: &'a Self) -> Difference<'a, B> { + fn diff(w1: B, w2: B) -> B { + w1 & !w2 + } + + Difference(BlockIter::from_blocks(TwoBitPositions { + set: self.bit_vec.blocks(), + other: other.bit_vec.blocks(), + merge: diff, + })) + } + + /// Iterator over each usize stored in the symmetric difference of `self` and `other`. + /// See [`symmetric_difference_with`] for an efficient in-place version. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let a = BitSet::from_bytes(&[0b01101000]); + /// let b = BitSet::from_bytes(&[0b10100000]); + /// + /// // Print 0, 1, 4 in arbitrary order + /// for x in a.symmetric_difference(&b) { + /// println!("{}", x); + /// } + /// ``` + /// + /// [`symmetric_difference_with`]: Self::symmetric_difference_with + #[inline] + pub fn symmetric_difference<'a>(&'a self, other: &'a Self) -> SymmetricDifference<'a, B> { + fn bitxor(w1: B, w2: B) -> B { + w1 ^ w2 + } + + SymmetricDifference(BlockIter::from_blocks(TwoBitPositions { + set: self.bit_vec.blocks(), + other: other.bit_vec.blocks(), + merge: bitxor, + })) + } + + /// Iterator over each usize stored in the `BitSet`. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let s = BitSet::from_bytes(&[0b01001010]); + /// + /// // Print 1, 4, 6 in arbitrary order + /// for x in s.iter() { + /// println!("{}", x); + /// } + /// ``` + #[inline] + pub fn iter(&self) -> Iter<'_, B> { + Iter(BlockIter::from_blocks(self.bit_vec.blocks())) + } +} + +/// An iterator combining two `BitSet` iterators. +#[derive(Clone)] +struct TwoBitPositions<'a, B: 'a + BitBlock> { + set: Blocks<'a, B>, + other: Blocks<'a, B>, + merge: fn(B, B) -> B, +} + +/// An iterator for `BitSet`. +#[derive(Clone)] +pub struct Iter<'a, B: 'a + BitBlock>(BlockIter, B>); +#[derive(Clone)] +pub struct Union<'a, B: 'a + BitBlock>(BlockIter, B>); +#[derive(Clone)] +pub struct Intersection<'a, B: 'a + BitBlock> { + iter: BlockIter, B>, + // as an optimization, we compute the maximum possible + // number of elements in the intersection, and count it + // down as we return elements. If we reach zero, we can + // stop. + n: usize, +} +#[derive(Clone)] +pub struct Difference<'a, B: 'a + BitBlock>(BlockIter, B>); +#[derive(Clone)] +pub struct SymmetricDifference<'a, B: 'a + BitBlock>(BlockIter, B>); + +impl Iterator for BlockIter +where + T: Iterator, +{ + type Item = usize; + + fn next(&mut self) -> Option { + while self.head == B::zero() { + match self.tail.next() { + Some(w) => self.head = w, + None => return None, + } + self.head_offset += B::bits(); + } + + // from the current block, isolate the + // LSB and subtract 1, producing k: + // a block with a number of set bits + // equal to the index of the LSB + let k = (self.head & (!self.head + B::one())) - B::one(); + // update block, removing the LSB + self.head = self.head & (self.head - B::one()); + // return offset + (index of LSB) + Some(self.head_offset + B::count_ones(k)) + } + + fn count(self) -> usize { + self.head.count_ones() + self.tail.map(|block| block.count_ones()).sum::() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + match self.tail.size_hint() { + (_, Some(h)) => (0, Some((1 + h) * B::bits())), + _ => (0, None), + } + } +} + +impl Iterator for TwoBitPositions<'_, B> { + type Item = B; + + fn next(&mut self) -> Option { + match (self.set.next(), self.other.next()) { + (Some(a), Some(b)) => Some((self.merge)(a, b)), + (Some(a), None) => Some((self.merge)(a, B::zero())), + (None, Some(b)) => Some((self.merge)(B::zero(), b)), + _ => None, + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (first_lower_bound, first_upper_bound) = self.set.size_hint(); + let (second_lower_bound, second_upper_bound) = self.other.size_hint(); + + let upper_bound = first_upper_bound.zip(second_upper_bound); + + let get_max = |(a, b)| cmp::max(a, b); + ( + cmp::max(first_lower_bound, second_lower_bound), + upper_bound.map(get_max), + ) + } +} + +impl Iterator for Iter<'_, B> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl Iterator for Union<'_, B> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl Iterator for Intersection<'_, B> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + if self.n != 0 { + self.n -= 1; + self.iter.next() + } else { + None + } + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + // We could invoke self.iter.size_hint() and incorporate that into the hint. + // In practice, that does not seem worthwhile because the lower bound will + // always be zero and the upper bound could only possibly less then n in a + // partially iterated iterator. However, it makes little sense ask for size_hint + // in a partially iterated iterator, so it did not seem worthwhile. + (0, Some(self.n)) + } + #[inline] + fn count(self) -> usize { + self.iter.count() + } +} + +impl Iterator for Difference<'_, B> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl Iterator for SymmetricDifference<'_, B> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl<'a, B: BitBlock> IntoIterator for &'a BitSet { + type Item = usize; + type IntoIter = Iter<'a, B>; + + fn into_iter(self) -> Iter<'a, B> { + self.iter() + } +} diff --git a/set/src/lib.rs b/set/src/lib.rs new file mode 100644 index 0000000..f7f268b --- /dev/null +++ b/set/src/lib.rs @@ -0,0 +1,76 @@ +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! # Description +//! +//! An implementation of a set using a bit vector as an underlying +//! representation for holding unsigned numerical elements. +//! +//! It should also be noted that the amount of storage necessary for holding a +//! set of objects is proportional to the maximum of the objects when viewed +//! as a `usize`. +//! +//! # Examples +//! +//! ``` +//! use bit_set::BitSet; +//! +//! // It's a regular set +//! let mut s = BitSet::new(); +//! s.insert(0); +//! s.insert(3); +//! s.insert(7); +//! +//! s.remove(7); +//! +//! if !s.contains(7) { +//! println!("There is no 7"); +//! } +//! +//! // Can initialize from a `BitVec` +//! let other = BitSet::from_bytes(&[0b11010000]); +//! +//! s.union_with(&other); +//! +//! // Print 0, 1, 3 in some order +//! for x in s.iter() { +//! println!("{}", x); +//! } +//! +//! // Can convert back to a `BitVec` +//! let bv = s.into_bit_vec(); +//! assert!(bv[3]); +//! ``` +#![doc(html_root_url = "https://docs.rs/bit-set/0.8.0")] +#![no_std] +#![deny(clippy::shadow_reuse)] +#![deny(clippy::shadow_same)] +#![deny(clippy::shadow_unrelated)] +#![warn(clippy::multiple_inherent_impl)] +#![warn(clippy::multiple_crate_versions)] +#![warn(clippy::single_match)] +#![warn(clippy::missing_safety_doc)] + +#[cfg(any(test, feature = "std"))] +extern crate std; + +mod iter; +mod set; +mod util; + +pub(crate) mod local_prelude { + pub use bit_vec::{BitBlock, BitVec, Blocks}; + pub use core::cmp::Ordering; + pub use core::iter::{self, Chain, Enumerate, FromIterator, Repeat, Skip, Take}; + pub use core::{cmp, fmt, hash}; +} + +pub use bit_vec::BitBlock; +pub use set::BitSet; diff --git a/set/src/set.rs b/set/src/set.rs new file mode 100644 index 0000000..a44fa9f --- /dev/null +++ b/set/src/set.rs @@ -0,0 +1,739 @@ +use crate::{local_prelude::*, util}; + +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[cfg_attr( + feature = "borsh", + derive(borsh::BorshDeserialize, borsh::BorshSerialize) +)] +#[cfg_attr( + feature = "miniserde", + derive(miniserde::Deserialize, miniserde::Serialize) +)] +pub struct BitSet { + pub(crate) bit_vec: BitVec, +} + +impl Clone for BitSet { + fn clone(&self) -> Self { + BitSet { + bit_vec: self.bit_vec.clone(), + } + } + + fn clone_from(&mut self, other: &Self) { + self.bit_vec.clone_from(&other.bit_vec); + } +} + +impl Default for BitSet { + #[inline] + fn default() -> Self { + BitSet { + bit_vec: Default::default(), + } + } +} + +impl FromIterator for BitSet { + fn from_iter>(iter: I) -> Self { + let mut ret = Self::default(); + ret.extend(iter); + ret + } +} + +impl Extend for BitSet { + #[inline] + fn extend>(&mut self, iter: I) { + for i in iter { + self.insert(i); + } + } +} + +impl PartialOrd for BitSet { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for BitSet { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.iter().cmp(other) + } +} + +impl PartialEq for BitSet { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.iter().eq(other) + } +} + +impl Eq for BitSet {} + +impl BitSet { + /// Creates a new empty `BitSet`. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = BitSet::new(); + /// ``` + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Creates a new `BitSet` with initially no contents, able to + /// hold `nbits` elements without resizing. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = BitSet::with_capacity(100); + /// assert!(s.capacity() >= 100); + /// ``` + #[inline] + pub fn with_capacity(nbits: usize) -> Self { + let bit_vec = BitVec::from_elem(nbits, false); + Self::from_bit_vec(bit_vec) + } + + /// Creates a new `BitSet` from the given bit vector. + /// + /// # Examples + /// + /// ``` + /// use bit_vec::BitVec; + /// use bit_set::BitSet; + /// + /// let bv = BitVec::from_bytes(&[0b01100000]); + /// let s = BitSet::from_bit_vec(bv); + /// + /// // Print 1, 2 in arbitrary order + /// for x in s.iter() { + /// println!("{}", x); + /// } + /// ``` + #[inline] + pub fn from_bit_vec(bit_vec: BitVec) -> Self { + BitSet { bit_vec } + } + + pub fn from_bytes(bytes: &[u8]) -> Self { + BitSet { + bit_vec: BitVec::from_bytes(bytes), + } + } +} + +#[allow(clippy::multiple_inherent_impl)] +impl BitSet { + /// Creates a new empty `BitSet`. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = ::new_general(); + /// ``` + #[inline] + pub fn new_general() -> Self { + Self::default() + } + + /// Creates a new `BitSet` with initially no contents, able to + /// hold `nbits` elements without resizing. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = ::with_capacity_general(100); + /// assert!(s.capacity() >= 100); + /// ``` + #[inline] + pub fn with_capacity_general(nbits: usize) -> Self { + let bit_vec = BitVec::from_elem_general(nbits, false); + Self::from_bit_vec_general(bit_vec) + } + + /// Creates a new `BitSet` from the given bit vector. + /// + /// # Examples + /// + /// ``` + /// use bit_vec::BitVec; + /// use bit_set::BitSet; + /// + /// let bv: BitVec = BitVec::from_bytes_general(&[0b01100000]); + /// let s = BitSet::from_bit_vec_general(bv); + /// + /// // Print 1, 2 in arbitrary order + /// for x in s.iter() { + /// println!("{}", x); + /// } + /// ``` + #[inline] + pub fn from_bit_vec_general(bit_vec: BitVec) -> Self { + BitSet { bit_vec } + } + + pub fn from_bytes_general(bytes: &[u8]) -> Self { + BitSet { + bit_vec: BitVec::from_bytes_general(bytes), + } + } + + /// Returns the capacity in bits for this bit vector. Inserting any + /// element less than this amount will not trigger a resizing. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = BitSet::with_capacity(100); + /// assert!(s.capacity() >= 100); + /// ``` + #[inline] + pub fn capacity(&self) -> usize { + self.bit_vec.capacity() + } + + /// Reserves capacity for the given `BitSet` to contain `len` distinct elements. In the case + /// of `BitSet` this means reallocations will not occur as long as all inserted elements + /// are less than `len`. + /// + /// The collection may reserve more space to avoid frequent reallocations. + /// + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = BitSet::new(); + /// s.reserve_len(10); + /// assert!(s.capacity() >= 10); + /// ``` + pub fn reserve_len(&mut self, len: usize) { + let cur_len = self.bit_vec.len(); + if len >= cur_len { + self.bit_vec.reserve(len - cur_len); + } + } + + /// Reserves the minimum capacity for the given `BitSet` to contain `len` distinct elements. + /// In the case of `BitSet` this means reallocations will not occur as long as all inserted + /// elements are less than `len`. + /// + /// Note that the allocator may give the collection more space than it requests. Therefore + /// capacity can not be relied upon to be precisely minimal. Prefer `reserve_len` if future + /// insertions are expected. + /// + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = BitSet::new(); + /// s.reserve_len_exact(10); + /// assert!(s.capacity() >= 10); + /// ``` + pub fn reserve_len_exact(&mut self, len: usize) { + let cur_len = self.bit_vec.len(); + if len >= cur_len { + self.bit_vec.reserve_exact(len - cur_len); + } + } + + /// Consumes this set to return the underlying bit vector. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = BitSet::new(); + /// s.insert(0); + /// s.insert(3); + /// + /// let bv = s.into_bit_vec(); + /// assert!(bv[0]); + /// assert!(bv[3]); + /// ``` + #[inline] + pub fn into_bit_vec(self) -> BitVec { + self.bit_vec + } + + /// Returns a reference to the underlying bit vector. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut set = BitSet::new(); + /// set.insert(0); + /// + /// let bv = set.get_ref(); + /// assert_eq!(bv[0], true); + /// ``` + #[inline] + pub fn get_ref(&self) -> &BitVec { + &self.bit_vec + } + + /// Returns a mutable reference to the underlying bit vector. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut set = BitSet::new(); + /// set.insert(0); + /// set.insert(3); + /// + /// { + /// let bv = set.get_mut(); + /// bv.set(1, true); + /// } + /// + /// assert!(set.contains(0)); + /// assert!(set.contains(1)); + /// assert!(set.contains(3)); + /// ``` + #[inline] + pub fn get_mut(&mut self) -> &mut BitVec { + &mut self.bit_vec + } + + /// # Safety + /// + /// Safe and upholds invariant if function `f` does not alter most + /// significant bits of the first argument where respective bits + /// in the second argument are equal 0. + /// + /// In other words, this is safe if `f` is XOR, OR, AND, but violates + /// invariant if it is XNOR, NAND. + /// + /// See the safety section below. + #[inline] + fn other_op(&mut self, other: &Self, mut f: F) + where + F: FnMut(B, B) -> B, + { + // Unwrap BitVecs + let self_bit_vec = &mut self.bit_vec; + let other_bit_vec = &other.bit_vec; + + let self_len = self_bit_vec.len(); + let other_len = other_bit_vec.len(); + + // Expand the vector if necessary + if self_len < other_len { + self_bit_vec.grow(other_len - self_len, false); + } + + // virtually pad other with 0's for equal lengths + let other_words = util::match_words(self_bit_vec, other_bit_vec).1; + + debug_assert!(self_bit_vec.len() >= other_bit_vec.len()); + + // Apply values found in other + for (i, w) in other_words { + let old = self_bit_vec.storage()[i]; + let new = f(old, w); + // Safety: + // We do not change the underlying Vec's size, so this is always ok. + // - What do we do to uphold the invariant for trailing bits? + // - We have a debug assert below that guards us against polluting + // trailing bits. + unsafe { + self_bit_vec.storage_mut()[i] = new; + } + if i == self_bit_vec.storage().len() - 1 && self_bit_vec.len() % B::bits() > 0 { + debug_assert!(new >> (self_bit_vec.len() % B::bits()) == B::zero()); + } + } + } + + /// Truncates the underlying vector to the least length required. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut s = BitSet::new(); + /// s.insert(3231); + /// s.remove(3231); + /// + /// // Internal storage will probably be bigger than necessary + /// println!("old capacity: {}", s.capacity()); + /// assert!(s.capacity() >= 3231); + /// + /// // Now should be smaller + /// s.shrink_to_fit(); + /// println!("new capacity: {}", s.capacity()); + /// ``` + #[inline] + pub fn shrink_to_fit(&mut self) { + let bit_vec = &mut self.bit_vec; + // Obtain original length + let old_len = bit_vec.storage().len(); + // Obtain coarse trailing zero length + let n = bit_vec + .storage() + .iter() + .rev() + .take_while(|&&n| n == B::zero()) + .count(); + // Truncate away all empty trailing blocks, then shrink_to_fit + let trunc_len = old_len - n; + // Safety: + // Those function calls may seem unsafe, but they are guaranteed + // not to introduce any memory unsafety. + // We set the correct length as a multiple of `B::bits()`, + // thus maintaining the trailing bit invariant. + unsafe { + bit_vec.storage_mut().truncate(trunc_len); + bit_vec.set_len(trunc_len * B::bits()); + } + bit_vec.shrink_to_fit(); + } + + /// Unions in-place with the specified other bit vector. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let a = 0b01101000; + /// let b = 0b10100000; + /// let res = 0b11101000; + /// + /// let mut a = BitSet::from_bytes(&[a]); + /// let b = BitSet::from_bytes(&[b]); + /// let res = BitSet::from_bytes(&[res]); + /// + /// a.union_with(&b); + /// assert_eq!(a, res); + /// ``` + #[inline] + pub fn union_with(&mut self, other: &Self) { + self.other_op(other, |w1, w2| w1 | w2); + } + + /// Intersects in-place with the specified other bit vector. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let a = 0b01101000; + /// let b = 0b10100000; + /// let res = 0b00100000; + /// + /// let mut a = BitSet::from_bytes(&[a]); + /// let b = BitSet::from_bytes(&[b]); + /// let res = BitSet::from_bytes(&[res]); + /// + /// a.intersect_with(&b); + /// assert_eq!(a, res); + /// ``` + #[inline] + pub fn intersect_with(&mut self, other: &Self) { + self.other_op(other, |w1, w2| w1 & w2); + } + + /// Makes this bit vector the difference with the specified other bit vector + /// in-place. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let a = 0b01101000; + /// let b = 0b10100000; + /// let a_b = 0b01001000; // a - b + /// let b_a = 0b10000000; // b - a + /// + /// let mut bva = BitSet::from_bytes(&[a]); + /// let bvb = BitSet::from_bytes(&[b]); + /// let bva_b = BitSet::from_bytes(&[a_b]); + /// let bvb_a = BitSet::from_bytes(&[b_a]); + /// + /// bva.difference_with(&bvb); + /// assert_eq!(bva, bva_b); + /// + /// let bva = BitSet::from_bytes(&[a]); + /// let mut bvb = BitSet::from_bytes(&[b]); + /// + /// bvb.difference_with(&bva); + /// assert_eq!(bvb, bvb_a); + /// ``` + #[inline] + pub fn difference_with(&mut self, other: &Self) { + self.other_op(other, |w1, w2| w1 & !w2); + } + + /// Makes this bit vector the symmetric difference with the specified other + /// bit vector in-place. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let a = 0b01101000; + /// let b = 0b10100000; + /// let res = 0b11001000; + /// + /// let mut a = BitSet::from_bytes(&[a]); + /// let b = BitSet::from_bytes(&[b]); + /// let res = BitSet::from_bytes(&[res]); + /// + /// a.symmetric_difference_with(&b); + /// assert_eq!(a, res); + /// ``` + #[inline] + pub fn symmetric_difference_with(&mut self, other: &Self) { + self.other_op(other, |w1, w2| w1 ^ w2); + } + + /* + /// Moves all elements from `other` into `Self`, leaving `other` empty. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut a = BitSet::new(); + /// a.insert(2); + /// a.insert(6); + /// + /// let mut b = BitSet::new(); + /// b.insert(1); + /// b.insert(3); + /// b.insert(6); + /// + /// a.append(&mut b); + /// + /// assert_eq!(a.len(), 4); + /// assert_eq!(b.len(), 0); + /// assert_eq!(a, BitSet::from_bytes(&[0b01110010])); + /// ``` + pub fn append(&mut self, other: &mut Self) { + self.union_with(other); + other.clear(); + } + + /// Splits the `BitSet` into two at the given key including the key. + /// Retains the first part in-place while returning the second part. + /// + /// # Examples + /// + /// ``` + /// use bit_set::BitSet; + /// + /// let mut a = BitSet::new(); + /// a.insert(2); + /// a.insert(6); + /// a.insert(1); + /// a.insert(3); + /// + /// let b = a.split_off(3); + /// + /// assert_eq!(a.len(), 2); + /// assert_eq!(b.len(), 2); + /// assert_eq!(a, BitSet::from_bytes(&[0b01100000])); + /// assert_eq!(b, BitSet::from_bytes(&[0b00010010])); + /// ``` + pub fn split_off(&mut self, at: usize) -> Self { + let mut other = BitSet::new(); + + if at == 0 { + swap(self, &mut other); + return other; + } else if at >= self.bit_vec.len() { + return other; + } + + // Calculate block and bit at which to split + let w = at / BITS; + let b = at % BITS; + + // Pad `other` with `w` zero blocks, + // append `self`'s blocks in the range from `w` to the end to `other` + other.bit_vec.storage_mut().extend(repeat(0u32).take(w) + .chain(self.bit_vec.storage()[w..].iter().cloned())); + other.bit_vec.nbits = self.bit_vec.nbits; + + if b > 0 { + other.bit_vec.storage_mut()[w] &= !0 << b; + } + + // Sets `bit_vec.len()` and fixes the last block as well + self.bit_vec.truncate(at); + + other + } + */ + + /// Counts the number of set bits in this set. + /// + /// Note that this function scans the set to calculate the number. + #[inline] + pub fn count(&self) -> usize { + self.bit_vec.blocks().fold(0, |acc, n| acc + n.count_ones()) + } + + /// Counts the number of set bits in this set. + /// + /// Note that this function scans the set to calculate the number. + #[inline] + #[deprecated = "use BitVec::count() instead"] + pub fn len(&self) -> usize { + self.count() + } + + /// Returns whether there are no bits set in this set + #[inline] + pub fn is_empty(&self) -> bool { + self.bit_vec.none() + } + + /// Removes all elements of this set. + /// + /// Different from [`reset`] only in that the capacity is preserved. + /// + /// [`reset`]: Self::reset + #[inline] + pub fn make_empty(&mut self) { + self.bit_vec.fill(false); + } + + /// Resets this set to an empty state. + /// + /// Different from [`make_empty`] only in that the capacity may NOT be preserved. + /// + /// [`make_empty`]: Self::make_empty + #[inline] + pub fn reset(&mut self) { + self.bit_vec.remove_all(); + } + + /// Clears all bits in this set + #[deprecated(since = "0.9.0", note = "please use `fn make_empty` instead")] + #[inline] + pub fn clear(&mut self) { + self.make_empty(); + } + + /// Returns `true` if this set contains the specified integer. + #[inline] + pub fn contains(&self, value: usize) -> bool { + let bit_vec = &self.bit_vec; + value < bit_vec.len() && bit_vec[value] + } + + /// Returns `true` if the set has no elements in common with `other`. + /// This is equivalent to checking for an empty intersection. + #[inline] + pub fn is_disjoint(&self, other: &Self) -> bool { + self.intersection(other).next().is_none() + } + + /// Returns `true` if the set is a subset of another. + #[inline] + pub fn is_subset(&self, other: &Self) -> bool { + let self_bit_vec = &self.bit_vec; + let other_bit_vec = &other.bit_vec; + let other_blocks = util::blocks_for_bits::(other_bit_vec.len()); + + // Check that `self` intersect `other` is self + self_bit_vec.blocks().zip(other_bit_vec.blocks()).all(|(w1, w2)| w1 & w2 == w1) && + // Make sure if `self` has any more blocks than `other`, they're all 0 + self_bit_vec.blocks().skip(other_blocks).all(|w| w == B::zero()) + } + + /// Returns `true` if the set is a superset of another. + #[inline] + pub fn is_superset(&self, other: &Self) -> bool { + other.is_subset(self) + } + + /// Adds a value to the set. Returns `true` if the value was not already + /// present in the set. + pub fn insert(&mut self, value: usize) -> bool { + if self.contains(value) { + return false; + } + + // Ensure we have enough space to hold the new element + let len = self.bit_vec.len(); + if value >= len { + self.bit_vec.grow(value - len + 1, false); + } + + self.bit_vec.set(value, true); + true + } + + /// Removes a value from the set. Returns `true` if the value was + /// present in the set. + pub fn remove(&mut self, value: usize) -> bool { + if !self.contains(value) { + return false; + } + + self.bit_vec.set(value, false); + + true + } + + /// Excludes `element` and all greater elements from the `BitSet`. + pub fn truncate(&mut self, element: usize) { + self.bit_vec.truncate(element); + } +} + +impl fmt::Debug for BitSet { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("BitSet") + .field("bit_vec", &self.bit_vec) + .finish() + } +} + +impl fmt::Display for BitSet { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_set().entries(self).finish() + } +} + +impl hash::Hash for BitSet { + fn hash(&self, state: &mut H) { + for pos in self { + pos.hash(state); + } + } +} diff --git a/set/src/util.rs b/set/src/util.rs new file mode 100644 index 0000000..c024242 --- /dev/null +++ b/set/src/util.rs @@ -0,0 +1,54 @@ +use crate::local_prelude::*; + +#[allow(type_alias_bounds)] +type MatchWords<'a, B: BitBlock> = + Chain>, Skip>>>>; + +/// Computes how many blocks are needed to store that many bits +pub(crate) fn blocks_for_bits(bits: usize) -> usize { + // If we want 17 bits, dividing by 32 will produce 0. So we add 1 to make sure we + // reserve enough. But if we want exactly a multiple of 32, this will actually allocate + // one too many. So we need to check if that's the case. We can do that by computing if + // bitwise AND by `32 - 1` is 0. But LLVM should be able to optimize the semantically + // superior modulo operator on a power of two to this. + // + // Note that we can technically avoid this branch with the expression + // `(nbits + BITS - 1) / 32::BITS`, but if nbits is almost usize::MAX this will overflow. + if bits % B::bits() == 0 { + bits / B::bits() + } else { + bits / B::bits() + 1 + } +} + +#[allow(clippy::iter_skip_zero)] +// Take two BitVec's, and return iterators of their words, where the shorter one +// has been padded with 0's +pub(crate) fn match_words<'a, 'b, B: BitBlock>( + a: &'a BitVec, + b: &'b BitVec, +) -> (MatchWords<'a, B>, MatchWords<'b, B>) { + let a_len = a.storage().len(); + let b_len = b.storage().len(); + + // have to uselessly pretend to pad the longer one for type matching + if a_len < b_len { + ( + a.blocks() + .enumerate() + .chain(iter::repeat(B::zero()).enumerate().take(b_len).skip(a_len)), + b.blocks() + .enumerate() + .chain(iter::repeat(B::zero()).enumerate().take(0).skip(0)), + ) + } else { + ( + a.blocks() + .enumerate() + .chain(iter::repeat(B::zero()).enumerate().take(0).skip(0)), + b.blocks() + .enumerate() + .chain(iter::repeat(B::zero()).enumerate().take(a_len).skip(b_len)), + ) + } +} diff --git a/set/tests/serialization.rs b/set/tests/serialization.rs new file mode 100644 index 0000000..1ce7ab6 --- /dev/null +++ b/set/tests/serialization.rs @@ -0,0 +1,47 @@ +#[allow(unused_imports)] +use bit_set::BitSet; + +#[cfg(feature = "serde")] +#[test] +fn test_serialization() { + let bset: BitSet = BitSet::new(); + let serialized = serde_json::to_string(&bset).unwrap(); + let unserialized: BitSet = serde_json::from_str(&serialized).unwrap(); + assert_eq!(bset, unserialized); + + let elems: Vec = vec![11, 42, 100, 101]; + let bset: BitSet = elems.iter().map(|n| *n).collect(); + let serialized = serde_json::to_string(&bset).unwrap(); + let unserialized = serde_json::from_str(&serialized).unwrap(); + assert_eq!(bset, unserialized); +} + +#[cfg(feature = "miniserde")] +#[test] +fn test_miniserde_serialization() { + let bset: BitSet = BitSet::new(); + let serialized = miniserde::json::to_string(&bset); + let unserialized: BitSet = miniserde::json::from_str(&serialized[..]).unwrap(); + assert_eq!(bset, unserialized); + + let elems: Vec = vec![11, 42, 100, 101]; + let bset: BitSet = elems.iter().map(|n| *n).collect(); + let serialized = miniserde::json::to_string(&bset); + let unserialized = miniserde::json::from_str(&serialized[..]).unwrap(); + assert_eq!(bset, unserialized); +} + +#[cfg(feature = "borsh")] +#[test] +fn test_borsh_serialization() { + let bset: BitSet = BitSet::new(); + let serialized = borsh::to_vec(&bset).unwrap(); + let unserialized: BitSet = borsh::from_slice(&serialized[..]).unwrap(); + assert_eq!(bset, unserialized); + + let elems: Vec = vec![11, 42, 100, 101]; + let bset: BitSet = elems.iter().map(|n| *n).collect(); + let serialized = borsh::to_vec(&bset).unwrap(); + let unserialized = borsh::from_slice(&serialized[..]).unwrap(); + assert_eq!(bset, unserialized); +} diff --git a/set/tests/set.rs b/set/tests/set.rs new file mode 100644 index 0000000..fce9b38 --- /dev/null +++ b/set/tests/set.rs @@ -0,0 +1,577 @@ +#![allow(clippy::shadow_reuse)] +#![allow(clippy::shadow_same)] +#![allow(clippy::shadow_unrelated)] + +use bit_set::BitSet; +use bit_vec::BitVec; +use std::cmp::Ordering::{Equal, Greater, Less}; +use std::vec::Vec; +use std::{format, vec}; + +#[test] +fn test_bit_set_display() { + let mut s = BitSet::new(); + s.insert(1); + s.insert(10); + s.insert(50); + s.insert(2); + assert_eq!("{1, 2, 10, 50}", format!("{}", s)); +} + +#[test] +fn test_bit_set_debug() { + let mut s = BitSet::new(); + s.insert(1); + s.insert(10); + s.insert(50); + s.insert(2); + let expected = "BitSet { bit_vec: BitVec { storage: \ + \"01100000001000000000000000000000 \ + 0000000000000000001\", nbits: 51 } }"; + let actual = format!("{:?}", s); + assert_eq!(expected, actual); +} + +#[test] +fn test_bit_set_from_usizes() { + let usizes = vec![0, 2, 2, 3]; + let a: BitSet = usizes.into_iter().collect(); + let mut b = BitSet::new(); + b.insert(0); + b.insert(2); + b.insert(3); + assert_eq!(a, b); +} + +#[test] +fn test_bit_set_iterator() { + let usizes = vec![0, 2, 2, 3]; + let bit_vec: BitSet = usizes.into_iter().collect(); + + let idxs: Vec<_> = bit_vec.iter().collect(); + assert_eq!(idxs, [0, 2, 3]); + assert_eq!(bit_vec.iter().count(), 3); + + let long: BitSet = (0..10000).filter(|&n| n % 2 == 0).collect(); + let real: Vec<_> = (0..10000 / 2).map(|x| x * 2).collect(); + + let idxs: Vec<_> = long.iter().collect(); + assert_eq!(idxs, real); + assert_eq!(long.iter().count(), real.len()); +} + +#[test] +fn test_bit_set_frombit_vec_init() { + let bools = [true, false]; + let lengths = [10, 64, 100]; + for &b in &bools { + for &l in &lengths { + let bitset = BitSet::from_bit_vec(BitVec::from_elem(l, b)); + assert_eq!(bitset.contains(1), b); + assert_eq!(bitset.contains(l - 1), b); + assert!(!bitset.contains(l)); + } + } +} + +#[test] +fn test_bit_vec_masking() { + let b = BitVec::from_elem(140, true); + let mut bs = BitSet::from_bit_vec(b); + assert!(bs.contains(139)); + assert!(!bs.contains(140)); + assert!(bs.insert(150)); + assert!(!bs.contains(140)); + assert!(!bs.contains(149)); + assert!(bs.contains(150)); + assert!(!bs.contains(151)); +} + +#[test] +fn test_bit_set_basic() { + let mut b = BitSet::new(); + assert!(b.insert(3)); + assert!(!b.insert(3)); + assert!(b.contains(3)); + assert!(b.insert(4)); + assert!(!b.insert(4)); + assert!(b.contains(3)); + assert!(b.insert(400)); + assert!(!b.insert(400)); + assert!(b.contains(400)); + assert_eq!(b.count(), 3); +} + +#[test] +fn test_bit_set_intersection() { + let mut a = BitSet::new(); + let mut b = BitSet::new(); + + assert!(a.insert(11)); + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(77)); + assert!(a.insert(103)); + assert!(a.insert(5)); + + assert!(b.insert(2)); + assert!(b.insert(11)); + assert!(b.insert(77)); + assert!(b.insert(5)); + assert!(b.insert(3)); + + let expected = [3, 5, 11, 77]; + let actual: Vec<_> = a.intersection(&b).collect(); + assert_eq!(actual, expected); + assert_eq!(a.intersection(&b).count(), expected.len()); +} + +#[test] +fn test_bit_set_difference() { + let mut a = BitSet::new(); + let mut b = BitSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(200)); + assert!(a.insert(500)); + + assert!(b.insert(3)); + assert!(b.insert(200)); + + let expected = [1, 5, 500]; + let actual: Vec<_> = a.difference(&b).collect(); + assert_eq!(actual, expected); + assert_eq!(a.difference(&b).count(), expected.len()); +} + +#[test] +fn test_bit_set_symmetric_difference() { + let mut a = BitSet::new(); + let mut b = BitSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(3)); + assert!(b.insert(9)); + assert!(b.insert(14)); + assert!(b.insert(220)); + + let expected = [1, 5, 11, 14, 220]; + let actual: Vec<_> = a.symmetric_difference(&b).collect(); + assert_eq!(actual, expected); + assert_eq!(a.symmetric_difference(&b).count(), expected.len()); +} + +#[test] +fn test_bit_set_union() { + let mut a = BitSet::new(); + let mut b = BitSet::new(); + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + assert!(a.insert(160)); + assert!(a.insert(19)); + assert!(a.insert(24)); + assert!(a.insert(200)); + + assert!(b.insert(1)); + assert!(b.insert(5)); + assert!(b.insert(9)); + assert!(b.insert(13)); + assert!(b.insert(19)); + + let expected = [1, 3, 5, 9, 11, 13, 19, 24, 160, 200]; + let actual: Vec<_> = a.union(&b).collect(); + assert_eq!(actual, expected); + assert_eq!(a.union(&b).count(), expected.len()); +} + +#[test] +fn test_bit_set_subset() { + let mut set1 = BitSet::new(); + let mut set2 = BitSet::new(); + + assert!(set1.is_subset(&set2)); // {} {} + set2.insert(100); + assert!(set1.is_subset(&set2)); // {} { 1 } + set2.insert(200); + assert!(set1.is_subset(&set2)); // {} { 1, 2 } + set1.insert(200); + assert!(set1.is_subset(&set2)); // { 2 } { 1, 2 } + set1.insert(300); + assert!(!set1.is_subset(&set2)); // { 2, 3 } { 1, 2 } + set2.insert(300); + assert!(set1.is_subset(&set2)); // { 2, 3 } { 1, 2, 3 } + set2.insert(400); + assert!(set1.is_subset(&set2)); // { 2, 3 } { 1, 2, 3, 4 } + set2.remove(100); + assert!(set1.is_subset(&set2)); // { 2, 3 } { 2, 3, 4 } + set2.remove(300); + assert!(!set1.is_subset(&set2)); // { 2, 3 } { 2, 4 } + set1.remove(300); + assert!(set1.is_subset(&set2)); // { 2 } { 2, 4 } +} + +#[test] +fn test_bit_set_is_disjoint() { + let a = BitSet::from_bytes(&[0b10100010]); + let b = BitSet::from_bytes(&[0b01000000]); + let c = BitSet::new(); + let d = BitSet::from_bytes(&[0b00110000]); + + assert!(!a.is_disjoint(&d)); + assert!(!d.is_disjoint(&a)); + + assert!(a.is_disjoint(&b)); + assert!(a.is_disjoint(&c)); + assert!(b.is_disjoint(&a)); + assert!(b.is_disjoint(&c)); + assert!(c.is_disjoint(&a)); + assert!(c.is_disjoint(&b)); +} + +#[test] +fn test_bit_set_union_with() { + //a should grow to include larger elements + let mut a = BitSet::new(); + a.insert(0); + let mut b = BitSet::new(); + b.insert(5); + let expected = BitSet::from_bytes(&[0b10000100]); + a.union_with(&b); + assert_eq!(a, expected); + + // Standard + let mut a = BitSet::from_bytes(&[0b10100010]); + let mut b = BitSet::from_bytes(&[0b01100010]); + let c = a.clone(); + a.union_with(&b); + b.union_with(&c); + assert_eq!(a.count(), 4); + assert_eq!(b.count(), 4); +} + +#[test] +fn test_bit_set_intersect_with() { + // Explicitly 0'ed bits + let mut a = BitSet::from_bytes(&[0b10100010]); + let mut b = BitSet::from_bytes(&[0b00000000]); + let c = a.clone(); + a.intersect_with(&b); + b.intersect_with(&c); + assert!(a.is_empty()); + assert!(b.is_empty()); + + // Uninitialized bits should behave like 0's + let mut a = BitSet::from_bytes(&[0b10100010]); + let mut b = BitSet::new(); + let c = a.clone(); + a.intersect_with(&b); + b.intersect_with(&c); + assert!(a.is_empty()); + assert!(b.is_empty()); + + // Standard + let mut a = BitSet::from_bytes(&[0b10100010]); + let mut b = BitSet::from_bytes(&[0b01100010]); + let c = a.clone(); + a.intersect_with(&b); + b.intersect_with(&c); + assert_eq!(a.count(), 2); + assert_eq!(b.count(), 2); +} + +#[test] +fn test_bit_set_difference_with() { + // Explicitly 0'ed bits + let mut a = BitSet::from_bytes(&[0b00000000]); + let b = BitSet::from_bytes(&[0b10100010]); + a.difference_with(&b); + assert!(a.is_empty()); + + // Uninitialized bits should behave like 0's + let mut a = BitSet::new(); + let b = BitSet::from_bytes(&[0b11111111]); + a.difference_with(&b); + assert!(a.is_empty()); + + // Standard + let mut a = BitSet::from_bytes(&[0b10100010]); + let mut b = BitSet::from_bytes(&[0b01100010]); + let c = a.clone(); + a.difference_with(&b); + b.difference_with(&c); + assert_eq!(a.count(), 1); + assert_eq!(b.count(), 1); +} + +#[test] +fn test_bit_set_symmetric_difference_with() { + //a should grow to include larger elements + let mut a = BitSet::new(); + a.insert(0); + a.insert(1); + let mut b = BitSet::new(); + b.insert(1); + b.insert(5); + let expected = BitSet::from_bytes(&[0b10000100]); + a.symmetric_difference_with(&b); + assert_eq!(a, expected); + + let mut a = BitSet::from_bytes(&[0b10100010]); + let b = BitSet::new(); + let c = a.clone(); + a.symmetric_difference_with(&b); + assert_eq!(a, c); + + // Standard + let mut a = BitSet::from_bytes(&[0b11100010]); + let mut b = BitSet::from_bytes(&[0b01101010]); + let c = a.clone(); + a.symmetric_difference_with(&b); + b.symmetric_difference_with(&c); + assert_eq!(a.count(), 2); + assert_eq!(b.count(), 2); +} + +#[test] +fn test_bit_set_eq() { + let a = BitSet::from_bytes(&[0b10100010]); + let b = BitSet::from_bytes(&[0b00000000]); + let c = BitSet::new(); + + assert!(a == a); + assert!(a != b); + assert!(a != c); + assert!(b == b); + assert!(b == c); + assert!(c == c); +} + +#[test] +fn test_bit_set_cmp() { + let a = BitSet::from_bytes(&[0b10100010]); + let b = BitSet::from_bytes(&[0b00000000]); + let c = BitSet::new(); + + assert_eq!(a.cmp(&b), Greater); + assert_eq!(a.cmp(&c), Greater); + assert_eq!(b.cmp(&a), Less); + assert_eq!(b.cmp(&c), Equal); + assert_eq!(c.cmp(&a), Less); + assert_eq!(c.cmp(&b), Equal); +} + +#[test] +fn test_bit_set_shrink_to_fit_new() { + // There was a strange bug where we refused to truncate to 0 + // and this would end up actually growing the array in a way + // that (safely corrupted the state). + let mut a = BitSet::new(); + assert_eq!(a.count(), 0); + assert_eq!(a.capacity(), 0); + a.shrink_to_fit(); + assert_eq!(a.count(), 0); + assert_eq!(a.capacity(), 0); + assert!(!a.contains(1)); + a.insert(3); + assert!(a.contains(3)); + assert_eq!(a.count(), 1); + assert!(a.capacity() > 0); + a.shrink_to_fit(); + assert!(a.contains(3)); + assert_eq!(a.count(), 1); + assert!(a.capacity() > 0); +} + +#[test] +fn test_bit_set_shrink_to_fit() { + let mut a = BitSet::new(); + assert_eq!(a.count(), 0); + assert_eq!(a.capacity(), 0); + a.insert(259); + a.insert(98); + a.insert(3); + assert_eq!(a.count(), 3); + assert!(a.capacity() > 0); + assert!(!a.contains(1)); + assert!(a.contains(259)); + assert!(a.contains(98)); + assert!(a.contains(3)); + + a.shrink_to_fit(); + assert!(!a.contains(1)); + assert!(a.contains(259)); + assert!(a.contains(98)); + assert!(a.contains(3)); + assert_eq!(a.count(), 3); + assert!(a.capacity() > 0); + + let old_cap = a.capacity(); + assert!(a.remove(259)); + a.shrink_to_fit(); + assert!(a.capacity() < old_cap, "{} {}", a.capacity(), old_cap); + assert!(!a.contains(1)); + assert!(!a.contains(259)); + assert!(a.contains(98)); + assert!(a.contains(3)); + assert_eq!(a.count(), 2); + + let old_cap2 = a.capacity(); + a.make_empty(); + assert_eq!(a.capacity(), old_cap2); + assert_eq!(a.count(), 0); + assert!(!a.contains(1)); + assert!(!a.contains(259)); + assert!(!a.contains(98)); + assert!(!a.contains(3)); + + a.insert(512); + assert!(a.capacity() > 0); + assert_eq!(a.count(), 1); + assert!(a.contains(512)); + assert!(!a.contains(1)); + assert!(!a.contains(259)); + assert!(!a.contains(98)); + assert!(!a.contains(3)); + + a.remove(512); + a.shrink_to_fit(); + assert_eq!(a.capacity(), 0); + assert_eq!(a.count(), 0); + assert!(!a.contains(512)); + assert!(!a.contains(1)); + assert!(!a.contains(259)); + assert!(!a.contains(98)); + assert!(!a.contains(3)); + assert!(!a.contains(0)); +} + +#[test] +fn test_bit_vec_remove() { + let mut a = BitSet::new(); + + assert!(a.insert(1)); + assert!(a.remove(1)); + + assert!(a.insert(100)); + assert!(a.remove(100)); + + assert!(a.insert(1000)); + assert!(a.remove(1000)); + a.shrink_to_fit(); +} + +#[test] +fn test_bit_vec_clone() { + let mut a = BitSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(100)); + assert!(a.insert(1000)); + + let mut b = a.clone(); + + assert!(a == b); + + assert!(b.remove(1)); + assert!(a.contains(1)); + + assert!(a.remove(1000)); + assert!(b.contains(1000)); +} + +#[test] +fn test_truncate() { + let bytes = [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]; + + let mut s = BitSet::from_bytes(&bytes); + s.truncate(5 * 8); + + assert_eq!(s, BitSet::from_bytes(&bytes[..5])); + assert_eq!(s.count(), 5 * 8); + s.truncate(4 * 8); + assert_eq!(s, BitSet::from_bytes(&bytes[..4])); + assert_eq!(s.count(), 4 * 8); + // Truncating to a size > s.len() should be a noop + s.truncate(5 * 8); + assert_eq!(s, BitSet::from_bytes(&bytes[..4])); + assert_eq!(s.count(), 4 * 8); + s.truncate(8); + assert_eq!(s, BitSet::from_bytes(&bytes[..1])); + assert_eq!(s.count(), 8); + s.truncate(0); + assert_eq!(s, BitSet::from_bytes(&[])); + assert_eq!(s.count(), 0); +} + +/* + #[test] + fn test_bit_set_append() { + let mut a = BitSet::new(); + a.insert(2); + a.insert(6); + + let mut b = BitSet::new(); + b.insert(1); + b.insert(3); + b.insert(6); + + a.append(&mut b); + + assert_eq!(a.len(), 4); + assert_eq!(b.len(), 0); + assert!(b.capacity() >= 6); + + assert_eq!(a, BitSet::from_bytes(&[0b01110010])); + } + + #[test] + fn test_bit_set_split_off() { + // Split at 0 + let mut a = BitSet::from_bytes(&[0b10100000, 0b00010010, 0b10010010, + 0b00110011, 0b01101011, 0b10101101]); + + let b = a.split_off(0); + + assert_eq!(a.len(), 0); + assert_eq!(b.len(), 21); + + assert_eq!(b, BitSet::from_bytes(&[0b10100000, 0b00010010, 0b10010010, + 0b00110011, 0b01101011, 0b10101101]); + + // Split behind last element + let mut a = BitSet::from_bytes(&[0b10100000, 0b00010010, 0b10010010, + 0b00110011, 0b01101011, 0b10101101]); + + let b = a.split_off(50); + + assert_eq!(a.len(), 21); + assert_eq!(b.len(), 0); + + assert_eq!(a, BitSet::from_bytes(&[0b10100000, 0b00010010, 0b10010010, + 0b00110011, 0b01101011, 0b10101101])); + + // Split at arbitrary element + let mut a = BitSet::from_bytes(&[0b10100000, 0b00010010, 0b10010010, + 0b00110011, 0b01101011, 0b10101101]); + + let b = a.split_off(34); + + assert_eq!(a.len(), 12); + assert_eq!(b.len(), 9); + + assert_eq!(a, BitSet::from_bytes(&[0b10100000, 0b00010010, 0b10010010, + 0b00110011, 0b01000000])); + assert_eq!(b, BitSet::from_bytes(&[0, 0, 0, 0, + 0b00101011, 0b10101101])); + } +*/ diff --git a/vec/Cargo.toml b/vec/Cargo.toml new file mode 100644 index 0000000..121a026 --- /dev/null +++ b/vec/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "bit-vec" +version.workspace = true +rust-version.workspace = true +authors = ["Alexis Beingessner "] +license = "Apache-2.0 OR MIT" +description = "A vector of bits" +repository = "https://github.com/contain-rs/bit-vec" +homepage = "https://github.com/contain-rs/bit-vec" +documentation = "https://docs.rs/bit-vec/" +keywords = ["data-structures", "bitvec", "bitmask", "bitmap", "bit"] +readme = "README.md" +edition = "2021" + +[dependencies] +borsh = { version = "1.6.0", default-features = false, features = ["derive"], optional = true } +serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } +miniserde = { version = "0.1", optional = true } + +[dev-dependencies] +serde_json = "1.0" +rand = "0.10" +rand_xorshift = "0.5" +generic-tests = "0.1" + +[features] +default = ["std"] +std = ["serde?/std", "borsh?/std"] +allocator_api = [] + +[package.metadata.docs.rs] +features = ["borsh", "serde", "miniserde"] diff --git a/vec/LICENSE-APACHE b/vec/LICENSE-APACHE new file mode 120000 index 0000000..965b606 --- /dev/null +++ b/vec/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/vec/LICENSE-MIT b/vec/LICENSE-MIT new file mode 120000 index 0000000..76219eb --- /dev/null +++ b/vec/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/vec/README.md b/vec/README.md new file mode 100644 index 0000000..73aa535 --- /dev/null +++ b/vec/README.md @@ -0,0 +1,136 @@ +
+

bit-vec

+

+ A compact vector of bits. +

+

+ +[![crates.io][crates.io shield]][crates.io link] +[![Documentation][docs.rs badge]][docs.rs link] +![Rust CI][github ci badge] +![MSRV][rustc 1.82+] +
+
+[![Dependency Status][deps.rs status]][deps.rs link] +[![Download Status][shields.io download count]][crates.io link] + +

+
+ +[crates.io shield]: https://img.shields.io/crates/v/bit-vec?label=latest +[crates.io link]: https://crates.io/crates/bit-vec +[docs.rs badge]: https://docs.rs/bit-vec/badge.svg?version=0.10.0 +[docs.rs link]: https://docs.rs/bit-vec/0.10.0/bit_vec/ +[github ci badge]: https://github.com/contain-rs/bit-vec/actions/workflows/rust.yml/badge.svg +[rustc 1.82+]: https://img.shields.io/badge/rustc-1.82%2B-blue.svg +[deps.rs status]: https://deps.rs/crate/bit-vec/0.10.0/status.svg +[deps.rs link]: https://deps.rs/crate/bit-vec/0.10.0 +[shields.io download count]: https://img.shields.io/crates/d/bit-vec.svg + +## Usage + +Add this to your Cargo.toml: + +```toml +[dependencies] +bit-vec = "0.10" +``` + +If you want [serde](https://github.com/serde-rs/serde) support, include the feature like this: + +```toml +[dependencies] +bit-vec = { version = "0.10", features = ["serde"] } +``` + +If you want to use bit-vec in a program that has `#![no_std]`, just drop default features: + +```toml +[dependencies] +bit-vec = { version = "0.10", default-features = false } +``` + +If you want to use serde with the alloc crate instead of std, use this: + +```toml +[dependencies] +bit-vec = { version = "0.10", default-features = false, features = ["serde"] } +``` + +If you want [borsh-rs](https://github.com/near/borsh-rs) support, include it like this: + +```toml +[dependencies] +bit-vec = { version = "0.10", features = ["borsh"] } +``` + +Another available serialization library can be enabled with the +[`miniserde`](https://github.com/dtolnay/miniserde) feature. + + + +### Description + +Dynamic collections implemented with compact bit vectors. + +### Examples + +This is a simple example of the [Sieve of Eratosthenes][sieve] +which calculates prime numbers up to a given limit. + +[sieve]: http://en.wikipedia.org/wiki/Sieve_of_Eratosthenes + +```rust +use bit_vec::BitVec; + +let max_prime = 10000; + +// Store the primes as a BitVec +let primes = { + // Assume all numbers are prime to begin, and then we + // cross off non-primes progressively + let mut bv = BitVec::from_elem(max_prime, true); + + // Neither 0 nor 1 are prime + bv.set(0, false); + bv.set(1, false); + + for i in 2.. 1 + (max_prime as f64).sqrt() as usize { + // if i is a prime + if bv[i] { + // Mark all multiples of i as non-prime (any multiples below i * i + // will have been marked as non-prime previously) + for j in i.. { + if i * j >= max_prime { + break; + } + bv.set(i * j, false) + } + } + } + bv +}; + +// Simple primality tests below our max bound +let print_primes = 20; +print!("The primes below {} are: ", print_primes); +for x in 0..print_primes { + if primes.get(x).unwrap_or(false) { + print!("{} ", x); + } +} +println!(); + +let num_primes = primes.iter().filter(|x| *x).count(); +println!("There are {} primes below {}", num_primes, max_prime); +assert_eq!(num_primes, 1_229); +``` + + + +## License + +Dual-licensed for compatibility with the Rust project. + +Licensed under the Apache License Version 2.0: http://www.apache.org/licenses/LICENSE-2.0, +or the MIT license: http://opensource.org/licenses/MIT, at your option. diff --git a/benches/bench.rs b/vec/benches/bench.rs similarity index 94% rename from benches/bench.rs rename to vec/benches/bench.rs index b78b934..a3cec1f 100644 --- a/benches/bench.rs +++ b/vec/benches/bench.rs @@ -18,7 +18,7 @@ extern crate rand_xorshift; extern crate test; use bit_vec::BitVec; -use rand::{Rng, RngCore, SeedableRng}; +use rand::{Rng, RngExt, SeedableRng}; use rand_xorshift::XorShiftRng; use test::{black_box, Bencher}; @@ -27,7 +27,7 @@ const BENCH_BITS: usize = 1 << 14; const U32_BITS: usize = 32; fn small_rng() -> XorShiftRng { - XorShiftRng::from_os_rng() + XorShiftRng::from_rng(&mut rand::rng()) } #[bench] @@ -117,6 +117,7 @@ fn bench_bit_get_unchecked_small(b: &mut Bencher) { let bit_vec = black_box(bit_vec); b.iter(|| { for _ in 0..100 { + // Safety: This is just a benchmark of an unsafe fn. unsafe { black_box(bit_vec.get_unchecked((r.next_u32() as usize) % size)); } @@ -135,9 +136,10 @@ fn bench_bit_get_unchecked_small_assume(b: &mut Bencher) { let bit_vec = black_box(bit_vec); b.iter(|| { for _ in 0..100 { + // Safety: This is just a benchmark with an unsafe fn call. unsafe { let idx = (r.next_u32() as usize) % size; - ::std::hint::assert_unchecked(!(idx >= bit_vec.len())); + ::std::hint::assert_unchecked(idx < bit_vec.len()); black_box(bit_vec.get(idx)); } } @@ -227,7 +229,7 @@ fn bench_from_elem(b: &mut Bencher) { } #[bench] -fn bench_erathostenes(b: &mut test::Bencher) { +fn bench_eratosthenes(b: &mut test::Bencher) { let mut primes = vec![]; b.iter(|| { primes.clear(); @@ -250,13 +252,13 @@ fn bench_erathostenes(b: &mut test::Bencher) { } #[bench] -fn bench_erathostenes_set_all(b: &mut test::Bencher) { +fn bench_eratosthenes_set_all(b: &mut test::Bencher) { let mut primes = vec![]; let mut sieve = BitVec::from_elem(1 << 16, true); b.iter(|| { primes.clear(); black_box(&mut sieve); - sieve.set_all(); + sieve.fill(true); black_box(&mut sieve); let mut i = 2; while i < sieve.len() { diff --git a/vec/docs/README.md b/vec/docs/README.md new file mode 100644 index 0000000..4108499 --- /dev/null +++ b/vec/docs/README.md @@ -0,0 +1,185 @@ +# stackoverflow question + +Rust: Mark the method as unsafe, or just add "unsafe" to its name? Dilemma with library API design. + +Hi. I am maintaining a dynamic array of bits just like C++'s `vector`. + +The thing in question, is the method for getting access to the underlying `Vec`, which may let the caller mess up the dynamic array. It's not inherently memory-unsafe, but currently marked as such. My idea is to change this unsafe fn to be marked as safe, while adding a prefix `unsafe_` to its name. + +# code review request + +maintainer of `bit-vec` here. It's a Rust library for lists of booleans. + +The library is used by a couple thousand people, so I'd appreciate thorough review. You have a compact dynamic arrays of bits like `vector` in C++ and fill all elements with the given value, or remove one of the elements at the given index. This is basically it. But I had to deprecate `fn clear` by renaming it to `fn fill` because the name was inconsistent with other collections having `clear` truncate the list to zero elements: https://github.com/contain-rs/bit-vec/issues/16 + +You may see the code here: https://github.com/contain-rs/bit-vec/pull/134/changes https://github.com/contain-rs/bit-vec/pull/135/changes All of it except the tests is included below. + +```rust + /// Assigns all bits in this vector to the given boolean value. + /// + /// # Invariants + /// + /// - After a call to `.fill(true)`, the result of [`all`] is `true`. + /// - After a call to `.fill(false)`, the result of [`none`] is `true`. + /// + /// [`all`]: Self::all + /// [`none`]: Self::none + #[inline] + pub fn fill(&mut self, bit: bool) { + self.ensure_invariant(); + let block = if bit { !B::zero() } else { B::zero() }; + for w in &mut self.storage { + *w = block; + } + if bit { + self.fix_last_block(); + } + } + + /// Clears all bits in this vector. + #[inline] + #[deprecated(since = "0.9.0", note = "please use `.fill(false)` instead")] + pub fn clear(&mut self) { + self.ensure_invariant(); + for w in &mut self.storage { + *w = B::zero(); + } + } + + /// Remove a bit at index `at`, shifting all bits after by one. + /// + /// # Panics + /// Panics if `at` is out of bounds for `BitVec`'s length (that is, if `at >= BitVec::len()`) + /// + /// # Examples + ///``` + /// use bit_vec::BitVec; + /// + /// let mut b = BitVec::new(); + /// + /// b.push(true); + /// b.push(false); + /// b.push(false); + /// b.push(true); + /// assert!(!b.remove(1)); + /// + /// assert!(b.eq_vec(&[true, false, true])); + ///``` + /// + /// # Time complexity + /// Takes O([`len`]) time. All items after the removal index must be + /// shifted to the left. In the worst case, all elements are shifted when + /// the removal index is 0. + /// + /// [`len`]: Self::len + pub fn remove(&mut self, at: usize) -> bool { + assert!( + at < self.nbits, + "removal index (is {at}) should be < len (is {nbits})", + nbits = self.nbits + ); + self.ensure_invariant(); + + self.nbits -= 1; + + let last_block_bits = self.nbits % B::bits(); + let block_at = at / B::bits(); // needed block + let bit_at = at % B::bits(); // index within the block + + let lsbits_mask = (B::one() << bit_at) - B::one(); + + let mut carry = B::zero(); + + for block_ref in self.storage[block_at + 1..].iter_mut().rev() { + let curr_carry = *block_ref & B::one(); + *block_ref = *block_ref >> 1 | (carry << (B::bits() - 1)); + carry = curr_carry; + } + + // Safety: thanks to the assert above. + let result = unsafe { self.get_unchecked(at) }; + + self.storage[block_at] = (self.storage[block_at] & lsbits_mask) + | ((self.storage[block_at] & (!lsbits_mask << 1)) >> 1) + | carry << (B::bits() - 1); + + if last_block_bits == 0 { + self.storage.pop(); + } + + result + } +``` + +```rust +pub struct BitVec { + /// Internal representation of the bit vector + storage: Vec, + /// The number of valid bits in the internal representation + nbits: usize, +} + +/// Abstracts over a pile of bits (basically unsigned primitives) +pub trait BitBlock: + Copy + + Add + + Sub + + Shl + + Shr + + Not + + BitAnd + + BitOr + + BitXor + + Rem + + Eq + + Ord + + hash::Hash +{ + /// How many bits it has + fn bits() -> usize; + /// How many bytes it has + #[inline] + fn bytes() -> usize { + Self::bits() / 8 + } + /// Convert a byte into this type (lowest-order bits set) + fn from_byte(byte: u8) -> Self; + /// Count the number of 1's in the bitwise repr + fn count_ones(self) -> usize; + /// Count the number of 0's in the bitwise repr + fn count_zeros(self) -> usize { + Self::bits() - self.count_ones() + } + /// Get `0` + fn zero() -> Self; + /// Get `1` + fn one() -> Self; +} + +macro_rules! bit_block_impl { + ($(($t: ident, $size: expr)),*) => ($( + impl BitBlock for $t { + #[inline] + fn bits() -> usize { $size } + #[inline] + fn from_byte(byte: u8) -> Self { $t::from(byte) } + #[inline] + fn count_ones(self) -> usize { self.count_ones() as usize } + #[inline] + fn count_zeros(self) -> usize { self.count_zeros() as usize } + #[inline] + fn one() -> Self { 1 } + #[inline] + fn zero() -> Self { 0 } + } + )*) +} + +bit_block_impl! { + (u8, 8), + (u16, 16), + (u32, 32), + (u64, 64), + (usize, core::mem::size_of::() * 8) +} +``` \ No newline at end of file diff --git a/src/lib.rs b/vec/src/lib.rs similarity index 97% rename from src/lib.rs rename to vec/src/lib.rs index 782fe91..1319e14 100644 --- a/src/lib.rs +++ b/vec/src/lib.rs @@ -102,19 +102,6 @@ use std::string::String; #[cfg(feature = "std")] use std::vec::Vec; -#[cfg(feature = "serde")] -extern crate serde; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; -#[cfg(feature = "borsh")] -extern crate borsh; -#[cfg(feature = "miniserde")] -extern crate miniserde; -#[cfg(feature = "nanoserde")] -extern crate nanoserde; -#[cfg(feature = "nanoserde")] -use nanoserde::{DeBin, DeJson, DeRon, SerBin, SerJson, SerRon}; - #[cfg(not(feature = "std"))] #[macro_use] extern crate alloc; @@ -125,16 +112,21 @@ use alloc::string::String; #[cfg(not(feature = "std"))] use alloc::vec::Vec; +#[cfg(feature = "borsh")] +extern crate borsh; +#[cfg(feature = "miniserde")] +extern crate miniserde; +#[cfg(feature = "serde")] +extern crate serde; + +mod util; + use core::cell::RefCell; -use core::cmp; use core::cmp::Ordering; -use core::fmt::{self, Write}; -use core::hash; -use core::iter::repeat; +use core::fmt::Write; use core::iter::FromIterator; -use core::mem; use core::ops::*; -use core::slice; +use core::{cmp, fmt, hash, iter, mem, slice}; type MutBlocks<'a, B> = slice::IterMut<'a, B>; @@ -203,20 +195,6 @@ bit_block_impl! { (usize, core::mem::size_of::() * 8) } -fn reverse_bits(byte: u8) -> u8 { - let mut result = 0; - for i in 0..u8::bits() { - result |= ((byte >> i) & 1) << (u8::bits() - 1 - i); - } - result -} - -static TRUE: bool = true; -static FALSE: bool = false; - -#[cfg(feature = "nanoserde")] -type B = u32; - /// The bitvector type. /// /// # Examples @@ -244,7 +222,7 @@ type B = u32; /// println!("{:?}", bv); /// println!("total bits set to true: {}", bv.iter().filter(|x| *x).count()); /// ``` -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr( feature = "borsh", derive(borsh::BorshDeserialize, borsh::BorshSerialize) @@ -253,10 +231,6 @@ type B = u32; feature = "miniserde", derive(miniserde::Deserialize, miniserde::Serialize) )] -#[cfg_attr( - feature = "nanoserde", - derive(DeBin, DeJson, DeRon, SerBin, SerJson, SerRon) -)] pub struct BitVec { /// Internal representation of the bit vector storage: Vec, @@ -271,9 +245,9 @@ impl Index for BitVec { #[inline] fn index(&self, i: usize) -> &bool { if self.get(i).expect("index out of bounds") { - &TRUE + &util::TRUE } else { - &FALSE + &util::FALSE } } } @@ -468,7 +442,8 @@ impl BitVec { for i in 0..complete_words { let mut accumulator = B::zero(); for idx in 0..B::bytes() { - accumulator |= B::from_byte(reverse_bits(bytes[i * B::bytes() + idx])) << (idx * 8) + accumulator |= + B::from_byte(util::reverse_bits(bytes[i * B::bytes() + idx])) << (idx * 8) } bit_vec.storage.push(accumulator); } @@ -476,7 +451,7 @@ impl BitVec { if extra_bytes > 0 { let mut last_word = B::zero(); for (i, &byte) in bytes[complete_words * B::bytes()..].iter().enumerate() { - last_word |= B::from_byte(reverse_bits(byte)) << (i * 8); + last_word |= B::from_byte(util::reverse_bits(byte)) << (i * 8); } bit_vec.storage.push(last_word); } @@ -520,7 +495,7 @@ impl BitVec { let mut changed_bits = B::zero(); for (a, b) in self.blocks_mut().zip(other.blocks()) { let w = op(*a, b); - changed_bits = changed_bits | (*a ^ w); + changed_bits |= *a ^ w; *a = w; } changed_bits != B::zero() @@ -554,7 +529,8 @@ impl BitVec { /// /// # Safety /// - /// Can probably cause unsafety. Only really intended for `BitSet`. + /// Can break the structure's invariants despite not + /// giving real memory unsafety. Only really intended for `BitSet`. #[inline] pub unsafe fn storage_mut(&mut self) -> &mut Vec { &mut self.storage @@ -598,7 +574,7 @@ impl BitVec { /// to implement when unused bits are all set to 1s. fn fix_last_block_with_ones(&mut self) { if let Some((last_block, used_bits)) = self.last_block_mut_with_mask() { - *last_block = *last_block | !used_bits; + *last_block |= !used_bits; } } @@ -668,6 +644,9 @@ impl BitVec { /// use bit_vec::BitVec; /// /// let bv = BitVec::from_bytes(&[0b01100000]); + /// // Safety: + /// // We access the structure with in-bounds indices (those smaller + /// // than 32). /// unsafe { /// assert_eq!(bv.get_unchecked(0), false); /// assert_eq!(bv.get_unchecked(1), true); @@ -1271,7 +1250,7 @@ impl BitVec { for block in other.storage.drain(..) { { let last = self.storage.last_mut().unwrap(); - *last = *last | (block << b); + *last |= block << b; } self.storage.push(block >> (B::bits() - b)); } @@ -1405,18 +1384,6 @@ impl BitVec { /// assert_eq!(bv.to_bytes(), [0b00100000, 0b10000000]); /// ``` pub fn to_bytes(&self) -> Vec { - static REVERSE_TABLE: [u8; 256] = { - let mut tbl = [0u8; 256]; - let mut i: u8 = 0; - loop { - tbl[i as usize] = i.reverse_bits(); - if i == 255 { - break; - } - i += 1; - } - tbl - }; self.ensure_invariant(); let len = self.nbits / 8 + if self.nbits % 8 == 0 { 0 } else { 1 }; @@ -1430,7 +1397,7 @@ impl BitVec { byte |= 1 << bit_idx; } } - result.push(REVERSE_TABLE[byte as usize]); + result.push(util::reverse_bits(byte)); } result @@ -1598,7 +1565,7 @@ impl BitVec { let mask = mask_for_bits::(self.nbits); if value { let block = &mut self.storage[num_cur_blocks - 1]; - *block = *block | !mask; + *block |= !mask; } else { // Extra bits are already zero by invariant. } @@ -1613,7 +1580,7 @@ impl BitVec { // Allocate new words, if needed if new_nblocks > self.storage.len() { let to_add = new_nblocks - self.storage.len(); - self.storage.extend(repeat(full_value).take(to_add)); + self.storage.extend(iter::repeat_n(full_value, to_add)); } // Adjust internal bit count @@ -1923,7 +1890,7 @@ impl BitVec { self.nbits += 1; - self.storage[block_at] = self.storage[block_at] | flag; // set the bit + self.storage[block_at] |= flag; // set the bit Ok(()) } @@ -3213,11 +3180,11 @@ mod tests { fn test_serialization() { let bit_vec: BitVec = BitVec::new(); let serialized = serde_json::to_string(&bit_vec).unwrap(); - let unserialized: BitVec = serde_json::from_str(&serialized).unwrap(); + let unserialized: BitVec = serde_json::from_str(&serialized[..]).unwrap(); assert_eq!(bit_vec, unserialized); - let bools = vec![true, false, true, true]; - let bit_vec: BitVec = bools.iter().map(|n| *n).collect(); + let bools = [true, false, true, true]; + let bit_vec: BitVec = bools.iter().copied().collect(); let serialized = serde_json::to_string(&bit_vec).unwrap(); let unserialized = serde_json::from_str(&serialized).unwrap(); assert_eq!(bit_vec, unserialized); @@ -3228,43 +3195,26 @@ mod tests { fn test_miniserde_serialization() { let bit_vec: BitVec = BitVec::new(); let serialized = miniserde::json::to_string(&bit_vec); - let unserialized: BitVec = miniserde::json::from_str(&serialized[..]).unwrap(); + let unserialized = miniserde::json::from_str(&serialized[..]).unwrap(); assert_eq!(bit_vec, unserialized); - let bools = vec![true, false, true, true]; - let bit_vec: BitVec = bools.iter().map(|n| *n).collect(); + let bools = [true, false, true, true]; + let bit_vec: BitVec = bools.iter().copied().collect(); let serialized = miniserde::json::to_string(&bit_vec); let unserialized = miniserde::json::from_str(&serialized[..]).unwrap(); assert_eq!(bit_vec, unserialized); } - #[cfg(feature = "nanoserde")] - #[test] - fn test_nanoserde_json_serialization() { - use nanoserde::{DeJson, SerJson}; - - let bit_vec: BitVec = BitVec::new(); - let serialized = bit_vec.serialize_json(); - let unserialized: BitVec = BitVec::deserialize_json(&serialized[..]).unwrap(); - assert_eq!(bit_vec, unserialized); - - let bools = vec![true, false, true, true]; - let bit_vec: BitVec = bools.iter().map(|n| *n).collect(); - let serialized = bit_vec.serialize_json(); - let unserialized = BitVec::deserialize_json(&serialized[..]).unwrap(); - assert_eq!(bit_vec, unserialized); - } - #[cfg(feature = "borsh")] #[test] fn test_borsh_serialization() { let bit_vec: BitVec = BitVec::new(); let serialized = borsh::to_vec(&bit_vec).unwrap(); - let unserialized: BitVec = borsh::from_slice(&serialized[..]).unwrap(); + let unserialized = borsh::from_slice(&serialized[..]).unwrap(); assert_eq!(bit_vec, unserialized); - let bools = vec![true, false, true, true]; - let bit_vec: BitVec = bools.iter().map(|n| *n).collect(); + let bools = [true, false, true, true]; + let bit_vec: BitVec = bools.iter().copied().collect(); let serialized = borsh::to_vec(&bit_vec).unwrap(); let unserialized = borsh::from_slice(&serialized[..]).unwrap(); assert_eq!(bit_vec, unserialized); diff --git a/vec/src/util.rs b/vec/src/util.rs new file mode 100644 index 0000000..c413bc8 --- /dev/null +++ b/vec/src/util.rs @@ -0,0 +1,19 @@ +pub static TRUE: bool = true; +pub static FALSE: bool = false; + +pub(crate) fn reverse_bits(byte: u8) -> u8 { + REVERSE_TABLE[byte as usize] +} + +static REVERSE_TABLE: [u8; 256] = { + let mut tbl = [0u8; 256]; + let mut i: u8 = 0; + loop { + tbl[i as usize] = i.reverse_bits(); + if i == 255 { + break; + } + i += 1; + } + tbl +};