From f928edba9e028fc77e042d025980a286ff48f308 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 27 May 2026 03:21:44 -0400 Subject: [PATCH 01/20] Return GRAPH_META_PATH definition to graph_folder.rs and out of each storage crate. --- db4-storage/src/lib.rs | 3 --- raphtory/src/serialise/graph_folder.rs | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index a348bb0c60..fccf4533e2 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -231,9 +231,6 @@ pub fn read_constant_graph_properties( Ok(Vec::new()) } -/// Matches `db4_disk_storage::meta_file::GRAPH_META_PATH` -pub const GRAPH_META_PATH: &str = ".meta"; - /// No-op shim for when we have db4-storage instead of db4-disk-storage pub fn refresh_disk_graph_metadata( _disk_graph_path: &Path, diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 1cf96c95ee..45a5d5cbfd 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -33,7 +33,7 @@ pub const DATA_PATH: &str = "data"; pub const DEFAULT_DATA_PATH: &str = "data0"; /// Metadata file that stores path to the graph folder and graph metadata. -pub use storage::GRAPH_META_PATH; +pub const GRAPH_META_PATH: &str = ".meta"; /// Directory that stores graph data. pub const GRAPH_PATH: &str = "graph"; From 4209b36bd8c4cd197d85bcdbbe5656a223aecd89 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 27 May 2026 03:47:24 -0400 Subject: [PATCH 02/20] Move graph_folder constants out of raphtory and into raphtory-api. --- raphtory-api/src/core/storage/graph_folder.rs | 18 +++++++++++ raphtory-api/src/core/storage/mod.rs | 1 + raphtory-graphql/src/paths.rs | 3 +- raphtory-graphql/src/test_support.rs | 7 ++--- raphtory/src/serialise/graph_folder.rs | 31 +++++-------------- raphtory/src/serialise/serialise.rs | 6 ++-- 6 files changed, 34 insertions(+), 32 deletions(-) create mode 100644 raphtory-api/src/core/storage/graph_folder.rs diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs new file mode 100644 index 0000000000..fd13d76d74 --- /dev/null +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -0,0 +1,18 @@ +//! const vars for file and directory names regarding exported graphs. + +/// Metadata file that stores path to the data folder. +pub const ROOT_META_PATH: &str = ".raph"; +/// Outer most directory containing all data. +pub const DATA_PATH: &str = "data"; +pub const DEFAULT_DATA_PATH: &str = "data0"; +/// Metadata file that stores path to the graph folder and graph metadata. +pub const GRAPH_META_PATH: &str = ".meta"; +/// Directory that stores graph data. +pub const GRAPH_PATH: &str = "graph"; +pub const DEFAULT_GRAPH_PATH: &str = "graph0"; +/// Directory that stores search indexes. +pub const INDEX_PATH: &str = "index"; +/// Directory that stores vector embeddings of the graph. +pub const VECTORS_PATH: &str = "vectors"; +/// Temporary metadata file for atomic replacement. +pub const DIRTY_PATH: &str = ".dirty"; diff --git a/raphtory-api/src/core/storage/mod.rs b/raphtory-api/src/core/storage/mod.rs index ad33155ba7..3da53c0eab 100644 --- a/raphtory-api/src/core/storage/mod.rs +++ b/raphtory-api/src/core/storage/mod.rs @@ -4,6 +4,7 @@ use std::hash::BuildHasherDefault; pub mod arc_str; pub mod dict_mapper; +pub mod graph_folder; pub mod locked_vec; pub mod sorted_vec_map; pub mod timeindex; diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 5ed8c6d09d..693d8ae116 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -9,9 +9,10 @@ use raphtory::{ prelude::{AdditionOps, GraphViewOps}, serialise::{ metadata::GraphMetadata, GraphFolder, GraphPaths, RelativePath, StableDecode, - WriteableGraphFolder, ROOT_META_PATH, + WriteableGraphFolder, }, }; +use raphtory_api::core::storage::graph_folder::ROOT_META_PATH; use std::{ cmp::Ordering, fs, diff --git a/raphtory-graphql/src/test_support.rs b/raphtory-graphql/src/test_support.rs index 76479a5732..69797717fb 100644 --- a/raphtory-graphql/src/test_support.rs +++ b/raphtory-graphql/src/test_support.rs @@ -13,12 +13,9 @@ use crate::{ }; use async_graphql::dynamic::Schema; use dynamic_graphql::Request; -use raphtory::{ - db::api::{storage::storage::Config, view::MaterializedGraph}, - serialise::ROOT_META_PATH, -}; +use raphtory::db::api::{storage::storage::Config, view::MaterializedGraph}; +use raphtory_api::core::storage::graph_folder::ROOT_META_PATH; use std::{path::Path, sync::Arc}; -use tempfile::{tempdir, TempDir}; pub(crate) struct TestSetup { pub(crate) data: Data, diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 45a5d5cbfd..58b2353972 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -15,7 +15,13 @@ use crate::{ serialise::metadata::GraphMetadata, }; use itertools::Itertools; -use raphtory_api::core::input::input_node::parse_u64_strict; +use raphtory_api::core::{ + input::input_node::parse_u64_strict, + storage::graph_folder::{ + DATA_PATH, DIRTY_PATH, GRAPH_META_PATH, GRAPH_PATH, INDEX_PATH, ROOT_META_PATH, + VECTORS_PATH, + }, +}; use serde::{Deserialize, Serialize}; use std::{ fs::{self, File}, @@ -25,29 +31,6 @@ use std::{ use walkdir::WalkDir; use zip::{write::FileOptions, ZipArchive, ZipWriter}; -/// Metadata file that stores path to the data folder. -pub const ROOT_META_PATH: &str = ".raph"; - -/// Outer most directory containing all data. -pub const DATA_PATH: &str = "data"; -pub const DEFAULT_DATA_PATH: &str = "data0"; - -/// Metadata file that stores path to the graph folder and graph metadata. -pub const GRAPH_META_PATH: &str = ".meta"; - -/// Directory that stores graph data. -pub const GRAPH_PATH: &str = "graph"; -pub const DEFAULT_GRAPH_PATH: &str = "graph0"; - -/// Directory that stores search indexes. -pub const INDEX_PATH: &str = "index"; - -/// Directory that stores vector embeddings of the graph. -pub const VECTORS_PATH: &str = "vectors"; - -/// Temporary metadata file for atomic replacement. -pub const DIRTY_PATH: &str = ".dirty"; - pub(crate) fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphError> { relative_path .strip_prefix(prefix) // should have the prefix diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 2623623e08..6438c9d706 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -9,10 +9,12 @@ use crate::{ get_zip_graph_path, metadata::GraphMetadata, parquet::{ParquetDecoder, ParquetEncoder}, - GraphFolder, GraphPaths, Metadata, RelativePath, DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH, - GRAPH_META_PATH, ROOT_META_PATH, + GraphFolder, GraphPaths, Metadata, RelativePath, }, }; +use raphtory_api::core::storage::graph_folder::{ + DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH, GRAPH_META_PATH, ROOT_META_PATH, +}; use std::{ fs::File, io::{Cursor, Read, Seek, Write}, From 9d1ddfc1440a400a4e17a1c9ffa4bdbe668376a5 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 27 May 2026 04:12:30 -0400 Subject: [PATCH 03/20] Now that the pub constants are in raphtory-api, we can avoid the duplicate DIRTY_PATH in raphtory-graphql/src/data.rs --- raphtory-graphql/src/data.rs | 2 -- raphtory-graphql/src/paths.rs | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 23e157f3f1..5a2b731f22 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -48,8 +48,6 @@ use std::{ use tracing::{error, warn}; use walkdir::WalkDir; -pub const DIRTY_PATH: &'static str = ".dirty"; - #[derive(thiserror::Error, Debug)] pub enum MutationErrorInner { #[error(transparent)] diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 693d8ae116..ecab36e81c 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -1,4 +1,4 @@ -use crate::{data::DIRTY_PATH, model::blocking_io, rayon::blocking_compute}; +use crate::{model::blocking_io, rayon::blocking_compute}; use futures_util::io; use raphtory::{ db::api::{ @@ -12,7 +12,7 @@ use raphtory::{ WriteableGraphFolder, }, }; -use raphtory_api::core::storage::graph_folder::ROOT_META_PATH; +use raphtory_api::core::storage::graph_folder::{DIRTY_PATH, ROOT_META_PATH}; use std::{ cmp::Ordering, fs, From c62e445e9639db6f0f1438135c93b6b3714c0cfd Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 28 May 2026 02:13:50 -0400 Subject: [PATCH 04/20] Fix import --- raphtory-graphql/src/test_support.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/raphtory-graphql/src/test_support.rs b/raphtory-graphql/src/test_support.rs index 69797717fb..d8573f21db 100644 --- a/raphtory-graphql/src/test_support.rs +++ b/raphtory-graphql/src/test_support.rs @@ -5,16 +5,13 @@ #![allow(dead_code)] use crate::{ - auth::Access, - auth_policy::AuthorizationPolicy, - config::app_config::AppConfig, - data::{Data, DIRTY_PATH}, + auth::Access, auth_policy::AuthorizationPolicy, config::app_config::AppConfig, data::Data, model::App, }; use async_graphql::dynamic::Schema; use dynamic_graphql::Request; use raphtory::db::api::{storage::storage::Config, view::MaterializedGraph}; -use raphtory_api::core::storage::graph_folder::ROOT_META_PATH; +use raphtory_api::core::storage::graph_folder::{DIRTY_PATH, ROOT_META_PATH}; use std::{path::Path, sync::Arc}; pub(crate) struct TestSetup { From 2cab13ca35d256d4c500030d4c69e4ff74b2a6d2 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 12 Jun 2026 04:34:23 -0400 Subject: [PATCH 05/20] Instead of moving constants only, move the whole graph_folder.rs file over. Now, we have many import errors because things from raphtory can't be imported (like GraphError) --- raphtory-api/Cargo.toml | 2 + raphtory-api/src/core/storage/graph_folder.rs | 868 ++++++++++++++++- raphtory/src/serialise/graph_folder.rs | 870 ------------------ raphtory/src/serialise/mod.rs | 2 - 4 files changed, 869 insertions(+), 873 deletions(-) delete mode 100644 raphtory/src/serialise/graph_folder.rs diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 14818af8fa..2062afeab4 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -45,6 +45,8 @@ iter-enum = { workspace = true } minijinja = { workspace = true, optional = true } display-error-chain = { workspace = true, optional = true } indexmap.workspace = true +walkdir.workspace = true +zip.workspace = true [dev-dependencies] proptest.workspace = true diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index fd13d76d74..d02b775220 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -1,18 +1,884 @@ -//! const vars for file and directory names regarding exported graphs. +//! Raphtory container format for managing graph data. +//! +//! Folder structure: +//! +//! GraphFolder +//! ├── .raph # Metadata file (json: {path: "data{id}"}) pointing at the current data folder +//! └── data{id}/ # Data folder (incremental id for atomic replacement) +//! ├── .meta # Metadata file (json: {path: "graph{id}", meta: {}}) pointing at the current graph folder +//! ├── graph{id}/ # Graph data (incremental id for atomic replacement) +//! ├── index/ # Search indexes (optional) +//! └── vectors/ # Vector embeddings (optional) + +use crate::core::input::input_node::parse_u64_strict; +use itertools::Itertools; +use raphtory::{ + db::api::view::internal::GraphView, errors::GraphError, prelude::ParquetEncoder, + serialise::metadata::GraphMetadata, +}; +use serde::{Deserialize, Serialize}; +use std::{ + fs::{self, File}, + io::{self, ErrorKind, Read, Seek, Write}, + path::{Path, PathBuf}, +}; +use walkdir::WalkDir; +use zip::{write::FileOptions, ZipArchive, ZipWriter}; /// Metadata file that stores path to the data folder. pub const ROOT_META_PATH: &str = ".raph"; + /// Outer most directory containing all data. pub const DATA_PATH: &str = "data"; pub const DEFAULT_DATA_PATH: &str = "data0"; + /// Metadata file that stores path to the graph folder and graph metadata. pub const GRAPH_META_PATH: &str = ".meta"; + /// Directory that stores graph data. pub const GRAPH_PATH: &str = "graph"; pub const DEFAULT_GRAPH_PATH: &str = "graph0"; + /// Directory that stores search indexes. pub const INDEX_PATH: &str = "index"; + /// Directory that stores vector embeddings of the graph. pub const VECTORS_PATH: &str = "vectors"; + /// Temporary metadata file for atomic replacement. pub const DIRTY_PATH: &str = ".dirty"; + +pub fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphError> { + relative_path + .strip_prefix(prefix) // should have the prefix + .and_then(parse_u64_strict) // the remainder should be the id + .ok_or_else(|| GraphError::InvalidRelativePath(relative_path.to_string()))?; + Ok(()) +} + +fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { + let mut value = String::new(); + file.read_to_string(&mut value)?; + let path: RelativePath = serde_json::from_str(&value)?; + valid_path_pointer(&path.path, prefix)?; + Ok(path.path) +} + +pub fn read_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result, GraphError> { + let file = match File::open(base_path.join(file_name)) { + Ok(file) => file, + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(error.into()), + } + } + }; + let path = read_path_from_file(file, prefix)?; + Ok(Some(path)) +} + +pub fn make_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result { + let mut id = read_path_pointer(base_path, file_name, prefix)? + .and_then(|path| { + path.strip_prefix(prefix) + .and_then(|id| id.parse::().ok()) + }) + .map_or(0, |id| id + 1); + + let mut path = format!("{prefix}{id}"); + while base_path.join(&path).exists() { + id += 1; + path = format!("{prefix}{id}"); + } + Ok(path) +} + +pub fn read_or_default_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result { + Ok(read_path_pointer(base_path, file_name, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) +} + +pub fn get_zip_data_path(zip: &mut ZipArchive) -> Result { + let file = zip.by_name(ROOT_META_PATH)?; + Ok(read_path_from_file(file, DATA_PATH)?) +} + +pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { + let mut path = get_zip_data_path(zip)?; + let graph_path = get_zip_graph_path_name(zip, path.clone())?; + path.push('/'); + path.push_str(&graph_path); + Ok(path) +} + +pub fn get_zip_graph_path_name( + zip: &mut ZipArchive, + mut data_path: String, +) -> Result { + data_path.push('/'); + data_path.push_str(GRAPH_META_PATH); + let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; + Ok(graph_path) +} + +pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { + let mut path = get_zip_data_path(zip)?; + path.push('/'); + path.push_str(GRAPH_META_PATH); + Ok(path) +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct RelativePath { + pub path: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Metadata { + pub path: String, + pub meta: GraphMetadata, +} + +pub trait GraphPaths { + fn root(&self) -> &Path; + + fn root_meta_path(&self) -> PathBuf { + self.root().join(ROOT_META_PATH) + } + + fn data_path(&self) -> Result { + Ok(InnerGraphFolder { + path: self.root().join(self.relative_data_path()?), + }) + } + + fn vectors_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(VECTORS_PATH); + Ok(path) + } + + fn index_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(INDEX_PATH); + Ok(path) + } + + fn graph_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(self.relative_graph_path()?); + Ok(path) + } + + fn meta_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(GRAPH_META_PATH); + Ok(path) + } + + fn is_zip(&self) -> bool { + self.root().is_file() + } + + fn read_zip(&self) -> Result, GraphError> { + if self.is_zip() { + let file = File::open(self.root())?; + let archive = ZipArchive::new(file)?; + Ok(archive) + } else { + Err(GraphError::NotAZip) + } + } + + fn relative_data_path(&self) -> Result { + let path = if self.is_zip() { + let mut zip = self.read_zip()?; + get_zip_data_path(&mut zip)? + } else { + read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)? + }; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + let data_path = get_zip_data_path(&mut zip)?; + get_zip_graph_path_name(&mut zip, data_path) + } else { + let data_path = self.data_path()?; + read_or_default_path_pointer(data_path.as_ref(), GRAPH_META_PATH, GRAPH_PATH) + } + } + + fn read_metadata(&self) -> Result { + let mut json = String::new(); + if self.is_zip() { + let mut zip = self.read_zip()?; + let path = get_zip_meta_path(&mut zip)?; + let mut zip_file = zip.by_name(&path)?; + zip_file.read_to_string(&mut json)?; + } else { + let mut file = File::open(self.meta_path()?)?; + file.read_to_string(&mut json)?; + } + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { + let graph_path = self.relative_graph_path()?; + let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: metadata, + }; + let tmp_path = self.data_path()?.path.join(".tmp"); + let tmp_file = File::create(&tmp_path)?; + serde_json::to_writer(tmp_file, &meta)?; + let path = self.meta_path()?; + fs::rename(tmp_path, path)?; + Ok(()) + } + + /// Returns true if folder is occupied by a graph. + fn is_reserved(&self) -> bool { + self.meta_path().map_or(false, |path| path.exists()) + } + + /// Initialise the data folder and metadata pointer + fn init(&self) -> Result<(), GraphError> { + if self.root().is_dir() { + let non_empty = self.root().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.root().into())); + } + } else { + fs::create_dir_all(self.root())? + } + + // Create the data folder and have the root metadata file point to it. + let data_path = self.relative_data_path()?; + fs::create_dir(self.root().join(&data_path))?; + fs::write( + self.root_meta_path(), + serde_json::to_string(&RelativePath { path: data_path })?, + )?; + + // Create the graph folder inside the data folder. + let graph_path = self.graph_path()?; + fs::create_dir(&graph_path)?; + + Ok(()) + } +} + +impl + ?Sized> GraphPaths for P { + fn root(&self) -> &Path { + self.as_ref() + } +} + +#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] +pub struct GraphFolder { + root_folder: PathBuf, + pub write_as_zip_format: bool, +} + +impl GraphPaths for GraphFolder { + fn root(&self) -> &Path { + &self.root_folder + } +} + +impl GraphFolder { + pub fn new_as_zip(path: impl AsRef) -> Self { + let folder: GraphFolder = path.into(); + Self { + write_as_zip_format: true, + ..folder + } + } + + /// Reserve a folder, marking it as occupied by a graph. + /// Returns an error if the folder has data. + pub fn init_write(self) -> Result { + if self.write_as_zip_format { + return Err(GraphError::ZippedGraphCannotBeSwapped); + } + let relative_data_path = self.relative_data_path()?; + let meta = serde_json::to_string(&RelativePath { + path: relative_data_path.clone(), + })?; + self.ensure_clean_root_dir()?; + let metapath = self.root_folder.join(DIRTY_PATH); + let mut path_file = File::create_new(&metapath)?; + path_file.write_all(meta.as_bytes())?; + fs::create_dir_all(self.root_folder.join(relative_data_path))?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) + } + + /// Prepare a graph folder for atomically swapping the data contents. + /// This returns an error if the folder is set to write as Zip. + /// + /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and + /// the contents of the corresponding folder are deleted. + pub fn init_swap(self) -> Result { + if self.write_as_zip_format { + return Err(GraphError::ZippedGraphCannotBeSwapped); + } + let old_swap = match read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH) { + Ok(path) => path, + Err(_) => { + fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up + None + } + }; + + fs::create_dir_all(self.root())?; + + let swap_path = match old_swap { + Some(relative_path) => { + let swap_path = self.root_folder.join(relative_path); + if swap_path.exists() { + fs::remove_dir_all(&swap_path)?; + } + swap_path + } + None => { + let new_relative_data_path = + make_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + let new_data_path = self.root_folder.join(&new_relative_data_path); + let meta = serde_json::to_string(&RelativePath { + path: new_relative_data_path, + })?; + let mut dirty_file = File::create_new(self.root_folder.join(DIRTY_PATH))?; + dirty_file.write_all(meta.as_bytes())?; + dirty_file.sync_all()?; + new_data_path + } + }; + fs::create_dir_all(swap_path)?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) + } + + /// Clears the folder of any contents. + pub fn clear(&self) -> Result<(), GraphError> { + if self.is_zip() { + return Err(GraphError::IOErrorMsg( + "Cannot clear a zip folder".to_string(), + )); + } + + fs::remove_dir_all(&self.root_folder)?; + fs::create_dir_all(&self.root_folder)?; + Ok(()) + } + + pub fn get_zip_graph_prefix(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) + } else { + let data_path = read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + let graph_path = read_or_default_path_pointer( + &self.root().join(&data_path), + GRAPH_META_PATH, + GRAPH_PATH, + )?; + Ok([data_path, graph_path].join("/")) + } + } + + fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { + if self.root_folder.exists() { + let non_empty = self.root_folder.read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.root_folder.clone())); + } + } else { + fs::create_dir(&self.root_folder)? + } + + Ok(()) + } + + pub fn is_disk_graph(&self) -> Result { + let meta = self.read_metadata()?; + Ok(meta.is_diskgraph) + } + + /// Creates a zip file from the folder. + pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphError> { + if self.is_zip() { + let mut reader = File::open(&self.root_folder)?; + io::copy(&mut reader, &mut writer)?; + } else { + let mut zip = ZipWriter::new(writer); + for entry in WalkDir::new(&self.root_folder) + .into_iter() + .filter_map(Result::ok) + { + let path = entry.path(); + let rel_path = path.strip_prefix(&self.root_folder).map_err(|e| { + GraphError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) + })?; + + let zip_entry_name = rel_path + .components() + .map(|name| name.as_os_str().to_string_lossy()) + .join("/"); + + if path.is_file() { + zip.start_file::<_, ()>(zip_entry_name, FileOptions::default())?; + + let mut file = File::open(path)?; + std::io::copy(&mut file, &mut zip)?; + } else if path.is_dir() && !zip_entry_name.is_empty() { + // Add empty directories to the zip + zip.add_directory::<_, ()>(zip_entry_name, FileOptions::default())?; + } + } + + zip.finish()?; + } + Ok(()) + } + + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { + self.ensure_clean_root_dir()?; + let mut archive = ZipArchive::new(reader)?; + archive.extract(self.root())?; + Ok(()) + } +} + +#[must_use] +#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] +pub struct WriteableGraphFolder { + path: PathBuf, +} + +impl GraphPaths for WriteableGraphFolder { + fn root(&self) -> &Path { + &self.path + } + + fn relative_data_path(&self) -> Result { + let path = read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH)? + .ok_or(GraphError::NoWriteInProgress)?; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + let path = + read_or_default_path_pointer(&self.data_path()?.as_ref(), GRAPH_META_PATH, GRAPH_PATH)?; + Ok(path) + } + + fn init(&self) -> Result<(), GraphError> { + Ok(()) + } +} + +impl WriteableGraphFolder { + /// Finalise an in-progress write by atomically renaming the '.dirty' file to '.raph' + /// and cleaning up any old data if it exists. + /// + /// This operation returns an error if there is no write in progress. + pub fn finish(self) -> Result { + let old_data = read_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + fs::rename( + self.root().join(DIRTY_PATH), + self.root().join(ROOT_META_PATH), + )?; + if let Some(old_data) = old_data { + let old_data_path = self.root().join(old_data); + if old_data_path.is_dir() { + fs::remove_dir_all(old_data_path)?; + } + } + Ok(GraphFolder { + root_folder: self.path, + write_as_zip_format: false, + }) + } +} + +#[derive(Clone, Debug)] +pub struct InnerGraphFolder { + path: PathBuf, +} + +impl AsRef for InnerGraphFolder { + fn as_ref(&self) -> &Path { + &self.path + } +} + +impl InnerGraphFolder { + pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { + let graph_path = self.relative_graph_path()?; + let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: metadata, + }; + let path = self.meta_path(); + let file = File::create(&path)?; + Ok(serde_json::to_writer(file, &meta)?) + } + + pub fn read_metadata(&self) -> Result { + let mut json = String::new(); + let mut file = File::open(self.meta_path())?; + file.read_to_string(&mut json)?; + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + pub fn replace_graph( + &self, + graph: impl ParquetEncoder + GraphView + std::fmt::Debug, + ) -> Result<(), GraphError> { + let data_path = self.as_ref(); + let old_relative_graph_path = self.relative_graph_path()?; + let old_graph_path = self.path.join(&old_relative_graph_path); + let meta = GraphMetadata::from_graph(&graph); + let new_relative_graph_path = make_path_pointer(data_path, GRAPH_META_PATH, GRAPH_PATH)?; + graph.encode_parquet(data_path.join(&new_relative_graph_path))?; + + let dirty_path = data_path.join(DIRTY_PATH); + fs::write( + &dirty_path, + &serde_json::to_vec(&Metadata { + path: new_relative_graph_path.clone(), + meta, + })?, + )?; + fs::rename(&dirty_path, data_path.join(GRAPH_META_PATH))?; + if new_relative_graph_path != old_relative_graph_path { + fs::remove_dir_all(old_graph_path)?; + } + Ok(()) + } + pub fn vectors_path(&self) -> PathBuf { + self.path.join(VECTORS_PATH) + } + + pub fn index_path(&self) -> PathBuf { + self.path.join(INDEX_PATH) + } + + pub fn meta_path(&self) -> PathBuf { + self.path.join(GRAPH_META_PATH) + } + + pub fn relative_graph_path(&self) -> Result { + let relative = read_or_default_path_pointer(&self.path, GRAPH_META_PATH, GRAPH_PATH)?; + Ok(relative) + } + + pub fn graph_path(&self) -> Result { + Ok(self.path.join(self.relative_graph_path()?)) + } + + fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { + if self.as_ref().exists() { + let non_empty = self.as_ref().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.as_ref().to_path_buf())); + } + } else { + fs::create_dir_all(self)? + } + Ok(()) + } + + /// Extracts a zip file to the folder. + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { + self.ensure_clean_root_dir()?; + + let mut zip = ZipArchive::new(reader)?; + let data_dir = get_zip_data_path(&mut zip)?; + + for i in 0..zip.len() { + let mut file = zip.by_index(i)?; + let zip_entry_name = match file.enclosed_name() { + Some(name) => name, + None => continue, + }; + if let Ok(inner_path) = zip_entry_name.strip_prefix(&data_dir) { + let out_path = self.as_ref().join(inner_path); + if file.is_dir() { + std::fs::create_dir_all(&out_path)?; + } else { + // Create any parent directories + if let Some(parent) = out_path.parent() { + std::fs::create_dir_all(parent)?; + } + + let mut out_file = std::fs::File::create(&out_path)?; + std::io::copy(&mut file, &mut out_file)?; + } + } + } + + Ok(()) + } +} + +impl> From

for GraphFolder { + fn from(value: P) -> Self { + let path: &Path = value.as_ref(); + Self { + root_folder: path.to_path_buf(), + write_as_zip_format: false, + } + } +} + +impl From<&GraphFolder> for GraphFolder { + fn from(value: &GraphFolder) -> Self { + value.clone() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // /// Verify that the metadata is re-created if it does not exist. + // #[test] + // #[ignore = "Need to think about how to deal with reading old format"] + // fn test_read_metadata_from_noninitialized_zip() { + // global_info_logger(); + // + // let graph = Graph::new(); + // graph.add_node(0, 0, NO_PROPS, None).unwrap(); + // + // let tmp_dir = tempfile::TempDir::new().unwrap(); + // let zip_path = tmp_dir.path().join("graph.zip"); + // let folder = GraphFolder::new_as_zip(&zip_path); + // graph.encode(&folder).unwrap(); + // + // // Remove the metadata file from the zip to simulate a noninitialized zip + // remove_metadata_from_zip(&zip_path); + // + // // Should fail because the metadata file is not present + // let err = folder.try_read_metadata(); + // assert!(err.is_err()); + // + // // Should re-create the metadata file + // let result = folder.read_metadata().unwrap(); + // assert_eq!( + // result, + // GraphMetadata { + // node_count: 1, + // edge_count: 0, + // metadata: vec![], + // graph_type: GraphType::EventGraph, + // is_diskgraph: false + // } + // ); + // } + + // /// Helper function to remove the metadata file from a zip + // fn remove_metadata_from_zip(zip_path: &Path) { + // let mut zip_file = std::fs::File::open(&zip_path).unwrap(); + // let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); + // let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); + // + // // Scope for the zip writer + // { + // let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); + // + // for i in 0..zip_archive.len() { + // let mut file = zip_archive.by_index(i).unwrap(); + // + // // Copy all files except the metadata file + // if file.name() != META_PATH { + // zip_writer + // .start_file::<_, ()>(file.name(), FileOptions::default()) + // .unwrap(); + // std::io::copy(&mut file, &mut zip_writer).unwrap(); + // } + // } + // + // zip_writer.finish().unwrap(); + // } + // + // std::fs::copy(temp_zip.path(), &zip_path).unwrap(); + // } + + // /// Verify that the metadata is re-created if it does not exist. + // #[test] + // #[ignore = "Need to think about how to handle reading from old format"] + // fn test_read_metadata_from_noninitialized_folder() { + // global_info_logger(); + // + // let graph = Graph::new(); + // graph.add_node(0, 0, NO_PROPS, None).unwrap(); + // + // let temp_folder = tempfile::TempDir::new().unwrap(); + // let folder = GraphFolder::from(temp_folder.path()); + // graph.encode(&folder).unwrap(); + // + // // Remove the metadata file + // std::fs::remove_file(folder.get_meta_path()).unwrap(); + // + // // Should fail because the metadata file is not present + // let err = folder.try_read_metadata(); + // assert!(err.is_err()); + // + // // Should re-create the metadata file + // let result = folder.read_metadata().unwrap(); + // assert_eq!( + // result, + // GraphMetadata { + // node_count: 1, + // edge_count: 0, + // metadata: vec![], + // graph_type: GraphType::EventGraph, + // is_diskgraph: false + // } + // ); + // } + #[test] + fn test_zip_from_folder() { + let graph = Graph::new(); + graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); + graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); + graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); + + // Create a regular folder and encode the graph + let temp_folder = tempfile::TempDir::new().unwrap(); + let initial_folder = GraphFolder::from(temp_folder.path().join("initial")); + graph.encode(&initial_folder).unwrap(); + + assert!(initial_folder.graph_path().unwrap().exists()); + assert!(initial_folder.meta_path().unwrap().exists()); + + // Create a zip file from the folder + let output_zip_path = temp_folder.path().join("output.zip"); + let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); + initial_folder.zip_from_folder(output_zip_file).unwrap(); + + assert!(output_zip_path.exists()); + + // Verify the output zip contains the same graph + let zip_folder = GraphFolder::new_as_zip(&output_zip_path); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); + + assert_graph_equal(&graph, &decoded_graph); + } + + #[test] + fn test_zip_from_zip() { + let graph = Graph::new(); + graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); + graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); + graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); + + // Create an initial zip file + let temp_folder = tempfile::TempDir::new().unwrap(); + let initial_zip_path = temp_folder.path().join("initial.zip"); + let initial_folder = GraphFolder::new_as_zip(&initial_zip_path); + graph.encode(&initial_folder).unwrap(); + + assert!(initial_zip_path.exists()); + + // Create a new zip file from the existing zip + let output_zip_path = temp_folder.path().join("output.zip"); + let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); + initial_folder.zip_from_folder(output_zip_file).unwrap(); + + assert!(output_zip_path.exists()); + + // Verify zip file sizes + let initial_size = std::fs::metadata(&initial_zip_path).unwrap().len(); + let output_size = std::fs::metadata(&output_zip_path).unwrap().len(); + assert_eq!(initial_size, output_size); + + // Verify the output zip contains the same graph + let zip_folder = GraphFolder::new_as_zip(&output_zip_path); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); + + assert_graph_equal(&graph, &decoded_graph); + } + + #[test] + fn test_unzip_to_folder() { + let graph = Graph::new(); + + graph + .add_edge(0, 0, 1, [("test prop 1", Prop::map(NO_PROPS))], None) + .unwrap(); + graph + .add_edge( + 1, + 2, + 3, + [("test prop 1", Prop::map([("key", "value")]))], + Some("layer_a"), + ) + .unwrap(); + graph + .add_edge(2, 3, 4, [("test prop 2", "value")], Some("layer_b")) + .unwrap(); + graph + .add_edge(3, 1, 4, [("test prop 3", 10.0)], None) + .unwrap(); + graph + .add_edge(4, 1, 3, [("test prop 4", true)], None) + .unwrap(); + + graph + .node(1) + .unwrap() + .add_updates(5, [("test node prop", 5i32)], None) + .unwrap(); + + let temp_folder = tempfile::TempDir::new().unwrap(); + let folder = temp_folder.path().join("graph"); + let graph_folder = GraphFolder::from(&folder); + + graph.encode(&graph_folder).unwrap(); + assert!(graph_folder.graph_path().unwrap().exists()); + + // Zip the folder + let mut zip_bytes = Vec::new(); + let cursor = std::io::Cursor::new(&mut zip_bytes); + graph_folder.zip_from_folder(cursor).unwrap(); + + // Unzip to a new folder + let folder = temp_folder.path().join("unzip"); + let unzip_folder = GraphFolder::from(&folder); + let cursor = std::io::Cursor::new(&zip_bytes); + unzip_folder.unzip_to_folder(cursor).unwrap(); + + // Verify the extracted folder has the same structure + assert!(unzip_folder.graph_path().unwrap().exists()); + assert!(unzip_folder.meta_path().unwrap().exists()); + + // Verify the extracted graph is the same as the original + let extracted_graph = Graph::decode(&unzip_folder).unwrap(); + assert_graph_equal(&graph, &extracted_graph); + } +} diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs deleted file mode 100644 index 58b2353972..0000000000 --- a/raphtory/src/serialise/graph_folder.rs +++ /dev/null @@ -1,870 +0,0 @@ -//! Raphtory container format for managing graph data. -//! -//! Folder structure: -//! -//! GraphFolder -//! ├── .raph # Metadata file (json: {path: "data{id}"}) pointing at the current data folder -//! └── data{id}/ # Data folder (incremental id for atomic replacement) -//! ├── .meta # Metadata file (json: {path: "graph{id}", meta: {}}) pointing at the current graph folder -//! ├── graph{id}/ # Graph data (incremental id for atomic replacement) -//! ├── index/ # Search indexes (optional) -//! └── vectors/ # Vector embeddings (optional) - -use crate::{ - db::api::view::internal::GraphView, errors::GraphError, prelude::ParquetEncoder, - serialise::metadata::GraphMetadata, -}; -use itertools::Itertools; -use raphtory_api::core::{ - input::input_node::parse_u64_strict, - storage::graph_folder::{ - DATA_PATH, DIRTY_PATH, GRAPH_META_PATH, GRAPH_PATH, INDEX_PATH, ROOT_META_PATH, - VECTORS_PATH, - }, -}; -use serde::{Deserialize, Serialize}; -use std::{ - fs::{self, File}, - io::{self, ErrorKind, Read, Seek, Write}, - path::{Path, PathBuf}, -}; -use walkdir::WalkDir; -use zip::{write::FileOptions, ZipArchive, ZipWriter}; - -pub(crate) fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphError> { - relative_path - .strip_prefix(prefix) // should have the prefix - .and_then(parse_u64_strict) // the remainder should be the id - .ok_or_else(|| GraphError::InvalidRelativePath(relative_path.to_string()))?; - Ok(()) -} - -fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { - let mut value = String::new(); - file.read_to_string(&mut value)?; - let path: RelativePath = serde_json::from_str(&value)?; - valid_path_pointer(&path.path, prefix)?; - Ok(path.path) -} - -pub fn read_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result, GraphError> { - let file = match File::open(base_path.join(file_name)) { - Ok(file) => file, - Err(error) => { - return match error.kind() { - ErrorKind::NotFound => Ok(None), - _ => Err(error.into()), - } - } - }; - let path = read_path_from_file(file, prefix)?; - Ok(Some(path)) -} - -pub fn make_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result { - let mut id = read_path_pointer(base_path, file_name, prefix)? - .and_then(|path| { - path.strip_prefix(prefix) - .and_then(|id| id.parse::().ok()) - }) - .map_or(0, |id| id + 1); - - let mut path = format!("{prefix}{id}"); - while base_path.join(&path).exists() { - id += 1; - path = format!("{prefix}{id}"); - } - Ok(path) -} - -pub fn read_or_default_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result { - Ok(read_path_pointer(base_path, file_name, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) -} - -pub fn get_zip_data_path(zip: &mut ZipArchive) -> Result { - let file = zip.by_name(ROOT_META_PATH)?; - Ok(read_path_from_file(file, DATA_PATH)?) -} - -pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { - let mut path = get_zip_data_path(zip)?; - let graph_path = get_zip_graph_path_name(zip, path.clone())?; - path.push('/'); - path.push_str(&graph_path); - Ok(path) -} - -pub fn get_zip_graph_path_name( - zip: &mut ZipArchive, - mut data_path: String, -) -> Result { - data_path.push('/'); - data_path.push_str(GRAPH_META_PATH); - let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; - Ok(graph_path) -} - -pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { - let mut path = get_zip_data_path(zip)?; - path.push('/'); - path.push_str(GRAPH_META_PATH); - Ok(path) -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct RelativePath { - pub path: String, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct Metadata { - pub path: String, - pub meta: GraphMetadata, -} - -pub trait GraphPaths { - fn root(&self) -> &Path; - - fn root_meta_path(&self) -> PathBuf { - self.root().join(ROOT_META_PATH) - } - - fn data_path(&self) -> Result { - Ok(InnerGraphFolder { - path: self.root().join(self.relative_data_path()?), - }) - } - - fn vectors_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(VECTORS_PATH); - Ok(path) - } - - fn index_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(INDEX_PATH); - Ok(path) - } - - fn graph_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(self.relative_graph_path()?); - Ok(path) - } - - fn meta_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(GRAPH_META_PATH); - Ok(path) - } - - fn is_zip(&self) -> bool { - self.root().is_file() - } - - fn read_zip(&self) -> Result, GraphError> { - if self.is_zip() { - let file = File::open(self.root())?; - let archive = ZipArchive::new(file)?; - Ok(archive) - } else { - Err(GraphError::NotAZip) - } - } - - fn relative_data_path(&self) -> Result { - let path = if self.is_zip() { - let mut zip = self.read_zip()?; - get_zip_data_path(&mut zip)? - } else { - read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)? - }; - Ok(path) - } - - fn relative_graph_path(&self) -> Result { - if self.is_zip() { - let mut zip = self.read_zip()?; - let data_path = get_zip_data_path(&mut zip)?; - get_zip_graph_path_name(&mut zip, data_path) - } else { - let data_path = self.data_path()?; - read_or_default_path_pointer(data_path.as_ref(), GRAPH_META_PATH, GRAPH_PATH) - } - } - - fn read_metadata(&self) -> Result { - let mut json = String::new(); - if self.is_zip() { - let mut zip = self.read_zip()?; - let path = get_zip_meta_path(&mut zip)?; - let mut zip_file = zip.by_name(&path)?; - zip_file.read_to_string(&mut json)?; - } else { - let mut file = File::open(self.meta_path()?)?; - file.read_to_string(&mut json)?; - } - let metadata: Metadata = serde_json::from_str(&json)?; - Ok(metadata.meta) - } - - fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { - let graph_path = self.relative_graph_path()?; - let metadata = GraphMetadata::from_graph(graph); - let meta = Metadata { - path: graph_path, - meta: metadata, - }; - let tmp_path = self.data_path()?.path.join(".tmp"); - let tmp_file = File::create(&tmp_path)?; - serde_json::to_writer(tmp_file, &meta)?; - let path = self.meta_path()?; - fs::rename(tmp_path, path)?; - Ok(()) - } - - /// Returns true if folder is occupied by a graph. - fn is_reserved(&self) -> bool { - self.meta_path().map_or(false, |path| path.exists()) - } - - /// Initialise the data folder and metadata pointer - fn init(&self) -> Result<(), GraphError> { - if self.root().is_dir() { - let non_empty = self.root().read_dir()?.next().is_some(); - if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.root().into())); - } - } else { - fs::create_dir_all(self.root())? - } - - // Create the data folder and have the root metadata file point to it. - let data_path = self.relative_data_path()?; - fs::create_dir(self.root().join(&data_path))?; - fs::write( - self.root_meta_path(), - serde_json::to_string(&RelativePath { path: data_path })?, - )?; - - // Create the graph folder inside the data folder. - let graph_path = self.graph_path()?; - fs::create_dir(&graph_path)?; - - Ok(()) - } -} - -impl + ?Sized> GraphPaths for P { - fn root(&self) -> &Path { - self.as_ref() - } -} - -#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] -pub struct GraphFolder { - root_folder: PathBuf, - pub(crate) write_as_zip_format: bool, -} - -impl GraphPaths for GraphFolder { - fn root(&self) -> &Path { - &self.root_folder - } -} - -impl GraphFolder { - pub fn new_as_zip(path: impl AsRef) -> Self { - let folder: GraphFolder = path.into(); - Self { - write_as_zip_format: true, - ..folder - } - } - - /// Reserve a folder, marking it as occupied by a graph. - /// Returns an error if the folder has data. - pub fn init_write(self) -> Result { - if self.write_as_zip_format { - return Err(GraphError::ZippedGraphCannotBeSwapped); - } - let relative_data_path = self.relative_data_path()?; - let meta = serde_json::to_string(&RelativePath { - path: relative_data_path.clone(), - })?; - self.ensure_clean_root_dir()?; - let metapath = self.root_folder.join(DIRTY_PATH); - let mut path_file = File::create_new(&metapath)?; - path_file.write_all(meta.as_bytes())?; - fs::create_dir_all(self.root_folder.join(relative_data_path))?; - Ok(WriteableGraphFolder { - path: self.root_folder, - }) - } - - /// Prepare a graph folder for atomically swapping the data contents. - /// This returns an error if the folder is set to write as Zip. - /// - /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and - /// the contents of the corresponding folder are deleted. - pub fn init_swap(self) -> Result { - if self.write_as_zip_format { - return Err(GraphError::ZippedGraphCannotBeSwapped); - } - let old_swap = match read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH) { - Ok(path) => path, - Err(_) => { - fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up - None - } - }; - - fs::create_dir_all(self.root())?; - - let swap_path = match old_swap { - Some(relative_path) => { - let swap_path = self.root_folder.join(relative_path); - if swap_path.exists() { - fs::remove_dir_all(&swap_path)?; - } - swap_path - } - None => { - let new_relative_data_path = - make_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - let new_data_path = self.root_folder.join(&new_relative_data_path); - let meta = serde_json::to_string(&RelativePath { - path: new_relative_data_path, - })?; - let mut dirty_file = File::create_new(self.root_folder.join(DIRTY_PATH))?; - dirty_file.write_all(meta.as_bytes())?; - dirty_file.sync_all()?; - new_data_path - } - }; - fs::create_dir_all(swap_path)?; - Ok(WriteableGraphFolder { - path: self.root_folder, - }) - } - - /// Clears the folder of any contents. - pub fn clear(&self) -> Result<(), GraphError> { - if self.is_zip() { - return Err(GraphError::IOErrorMsg( - "Cannot clear a zip folder".to_string(), - )); - } - - fs::remove_dir_all(&self.root_folder)?; - fs::create_dir_all(&self.root_folder)?; - Ok(()) - } - - pub fn get_zip_graph_prefix(&self) -> Result { - if self.is_zip() { - let mut zip = self.read_zip()?; - Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) - } else { - let data_path = read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - let graph_path = read_or_default_path_pointer( - &self.root().join(&data_path), - GRAPH_META_PATH, - GRAPH_PATH, - )?; - Ok([data_path, graph_path].join("/")) - } - } - - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { - if self.root_folder.exists() { - let non_empty = self.root_folder.read_dir()?.next().is_some(); - if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.root_folder.clone())); - } - } else { - fs::create_dir(&self.root_folder)? - } - - Ok(()) - } - - pub fn is_disk_graph(&self) -> Result { - let meta = self.read_metadata()?; - Ok(meta.is_diskgraph) - } - - /// Creates a zip file from the folder. - pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphError> { - if self.is_zip() { - let mut reader = File::open(&self.root_folder)?; - io::copy(&mut reader, &mut writer)?; - } else { - let mut zip = ZipWriter::new(writer); - for entry in WalkDir::new(&self.root_folder) - .into_iter() - .filter_map(Result::ok) - { - let path = entry.path(); - let rel_path = path.strip_prefix(&self.root_folder).map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) - })?; - - let zip_entry_name = rel_path - .components() - .map(|name| name.as_os_str().to_string_lossy()) - .join("/"); - - if path.is_file() { - zip.start_file::<_, ()>(zip_entry_name, FileOptions::default())?; - - let mut file = File::open(path)?; - std::io::copy(&mut file, &mut zip)?; - } else if path.is_dir() && !zip_entry_name.is_empty() { - // Add empty directories to the zip - zip.add_directory::<_, ()>(zip_entry_name, FileOptions::default())?; - } - } - - zip.finish()?; - } - Ok(()) - } - - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { - self.ensure_clean_root_dir()?; - let mut archive = ZipArchive::new(reader)?; - archive.extract(self.root())?; - Ok(()) - } -} - -#[must_use] -#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] -pub struct WriteableGraphFolder { - path: PathBuf, -} - -impl GraphPaths for WriteableGraphFolder { - fn root(&self) -> &Path { - &self.path - } - - fn relative_data_path(&self) -> Result { - let path = read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH)? - .ok_or(GraphError::NoWriteInProgress)?; - Ok(path) - } - - fn relative_graph_path(&self) -> Result { - let path = - read_or_default_path_pointer(&self.data_path()?.as_ref(), GRAPH_META_PATH, GRAPH_PATH)?; - Ok(path) - } - - fn init(&self) -> Result<(), GraphError> { - Ok(()) - } -} - -impl WriteableGraphFolder { - /// Finalise an in-progress write by atomically renaming the '.dirty' file to '.raph' - /// and cleaning up any old data if it exists. - /// - /// This operation returns an error if there is no write in progress. - pub fn finish(self) -> Result { - let old_data = read_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - fs::rename( - self.root().join(DIRTY_PATH), - self.root().join(ROOT_META_PATH), - )?; - if let Some(old_data) = old_data { - let old_data_path = self.root().join(old_data); - if old_data_path.is_dir() { - fs::remove_dir_all(old_data_path)?; - } - } - Ok(GraphFolder { - root_folder: self.path, - write_as_zip_format: false, - }) - } -} - -#[derive(Clone, Debug)] -pub struct InnerGraphFolder { - path: PathBuf, -} - -impl AsRef for InnerGraphFolder { - fn as_ref(&self) -> &Path { - &self.path - } -} - -impl InnerGraphFolder { - pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { - let graph_path = self.relative_graph_path()?; - let metadata = GraphMetadata::from_graph(graph); - let meta = Metadata { - path: graph_path, - meta: metadata, - }; - let path = self.meta_path(); - let file = File::create(&path)?; - Ok(serde_json::to_writer(file, &meta)?) - } - - pub fn read_metadata(&self) -> Result { - let mut json = String::new(); - let mut file = File::open(self.meta_path())?; - file.read_to_string(&mut json)?; - let metadata: Metadata = serde_json::from_str(&json)?; - Ok(metadata.meta) - } - - pub fn replace_graph( - &self, - graph: impl ParquetEncoder + GraphView + std::fmt::Debug, - ) -> Result<(), GraphError> { - let data_path = self.as_ref(); - let old_relative_graph_path = self.relative_graph_path()?; - let old_graph_path = self.path.join(&old_relative_graph_path); - let meta = GraphMetadata::from_graph(&graph); - let new_relative_graph_path = make_path_pointer(data_path, GRAPH_META_PATH, GRAPH_PATH)?; - graph.encode_parquet(data_path.join(&new_relative_graph_path))?; - - let dirty_path = data_path.join(DIRTY_PATH); - fs::write( - &dirty_path, - &serde_json::to_vec(&Metadata { - path: new_relative_graph_path.clone(), - meta, - })?, - )?; - fs::rename(&dirty_path, data_path.join(GRAPH_META_PATH))?; - if new_relative_graph_path != old_relative_graph_path { - fs::remove_dir_all(old_graph_path)?; - } - Ok(()) - } - pub fn vectors_path(&self) -> PathBuf { - self.path.join(VECTORS_PATH) - } - - pub fn index_path(&self) -> PathBuf { - self.path.join(INDEX_PATH) - } - - pub fn meta_path(&self) -> PathBuf { - self.path.join(GRAPH_META_PATH) - } - - pub fn relative_graph_path(&self) -> Result { - let relative = read_or_default_path_pointer(&self.path, GRAPH_META_PATH, GRAPH_PATH)?; - Ok(relative) - } - - pub fn graph_path(&self) -> Result { - Ok(self.path.join(self.relative_graph_path()?)) - } - - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { - if self.as_ref().exists() { - let non_empty = self.as_ref().read_dir()?.next().is_some(); - if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.as_ref().to_path_buf())); - } - } else { - fs::create_dir_all(self)? - } - Ok(()) - } - - /// Extracts a zip file to the folder. - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { - self.ensure_clean_root_dir()?; - - let mut zip = ZipArchive::new(reader)?; - let data_dir = get_zip_data_path(&mut zip)?; - - for i in 0..zip.len() { - let mut file = zip.by_index(i)?; - let zip_entry_name = match file.enclosed_name() { - Some(name) => name, - None => continue, - }; - if let Ok(inner_path) = zip_entry_name.strip_prefix(&data_dir) { - let out_path = self.as_ref().join(inner_path); - if file.is_dir() { - std::fs::create_dir_all(&out_path)?; - } else { - // Create any parent directories - if let Some(parent) = out_path.parent() { - std::fs::create_dir_all(parent)?; - } - - let mut out_file = std::fs::File::create(&out_path)?; - std::io::copy(&mut file, &mut out_file)?; - } - } - } - - Ok(()) - } -} - -impl> From

for GraphFolder { - fn from(value: P) -> Self { - let path: &Path = value.as_ref(); - Self { - root_folder: path.to_path_buf(), - write_as_zip_format: false, - } - } -} - -impl From<&GraphFolder> for GraphFolder { - fn from(value: &GraphFolder) -> Self { - value.clone() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - db::graph::graph::assert_graph_equal, prelude::*, serialise::serialise::StableDecode, - }; - - // /// Verify that the metadata is re-created if it does not exist. - // #[test] - // #[ignore = "Need to think about how to deal with reading old format"] - // fn test_read_metadata_from_noninitialized_zip() { - // global_info_logger(); - // - // let graph = Graph::new(); - // graph.add_node(0, 0, NO_PROPS, None).unwrap(); - // - // let tmp_dir = tempfile::TempDir::new().unwrap(); - // let zip_path = tmp_dir.path().join("graph.zip"); - // let folder = GraphFolder::new_as_zip(&zip_path); - // graph.encode(&folder).unwrap(); - // - // // Remove the metadata file from the zip to simulate a noninitialized zip - // remove_metadata_from_zip(&zip_path); - // - // // Should fail because the metadata file is not present - // let err = folder.try_read_metadata(); - // assert!(err.is_err()); - // - // // Should re-create the metadata file - // let result = folder.read_metadata().unwrap(); - // assert_eq!( - // result, - // GraphMetadata { - // node_count: 1, - // edge_count: 0, - // metadata: vec![], - // graph_type: GraphType::EventGraph, - // is_diskgraph: false - // } - // ); - // } - - // /// Helper function to remove the metadata file from a zip - // fn remove_metadata_from_zip(zip_path: &Path) { - // let mut zip_file = std::fs::File::open(&zip_path).unwrap(); - // let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); - // let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); - // - // // Scope for the zip writer - // { - // let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); - // - // for i in 0..zip_archive.len() { - // let mut file = zip_archive.by_index(i).unwrap(); - // - // // Copy all files except the metadata file - // if file.name() != META_PATH { - // zip_writer - // .start_file::<_, ()>(file.name(), FileOptions::default()) - // .unwrap(); - // std::io::copy(&mut file, &mut zip_writer).unwrap(); - // } - // } - // - // zip_writer.finish().unwrap(); - // } - // - // std::fs::copy(temp_zip.path(), &zip_path).unwrap(); - // } - - // /// Verify that the metadata is re-created if it does not exist. - // #[test] - // #[ignore = "Need to think about how to handle reading from old format"] - // fn test_read_metadata_from_noninitialized_folder() { - // global_info_logger(); - // - // let graph = Graph::new(); - // graph.add_node(0, 0, NO_PROPS, None).unwrap(); - // - // let temp_folder = tempfile::TempDir::new().unwrap(); - // let folder = GraphFolder::from(temp_folder.path()); - // graph.encode(&folder).unwrap(); - // - // // Remove the metadata file - // std::fs::remove_file(folder.get_meta_path()).unwrap(); - // - // // Should fail because the metadata file is not present - // let err = folder.try_read_metadata(); - // assert!(err.is_err()); - // - // // Should re-create the metadata file - // let result = folder.read_metadata().unwrap(); - // assert_eq!( - // result, - // GraphMetadata { - // node_count: 1, - // edge_count: 0, - // metadata: vec![], - // graph_type: GraphType::EventGraph, - // is_diskgraph: false - // } - // ); - // } - #[test] - fn test_zip_from_folder() { - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); - graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); - graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); - - // Create a regular folder and encode the graph - let temp_folder = tempfile::TempDir::new().unwrap(); - let initial_folder = GraphFolder::from(temp_folder.path().join("initial")); - graph.encode(&initial_folder).unwrap(); - - assert!(initial_folder.graph_path().unwrap().exists()); - assert!(initial_folder.meta_path().unwrap().exists()); - - // Create a zip file from the folder - let output_zip_path = temp_folder.path().join("output.zip"); - let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); - initial_folder.zip_from_folder(output_zip_file).unwrap(); - - assert!(output_zip_path.exists()); - - // Verify the output zip contains the same graph - let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder).unwrap(); - - assert_graph_equal(&graph, &decoded_graph); - } - - #[test] - fn test_zip_from_zip() { - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); - graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); - graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); - - // Create an initial zip file - let temp_folder = tempfile::TempDir::new().unwrap(); - let initial_zip_path = temp_folder.path().join("initial.zip"); - let initial_folder = GraphFolder::new_as_zip(&initial_zip_path); - graph.encode(&initial_folder).unwrap(); - - assert!(initial_zip_path.exists()); - - // Create a new zip file from the existing zip - let output_zip_path = temp_folder.path().join("output.zip"); - let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); - initial_folder.zip_from_folder(output_zip_file).unwrap(); - - assert!(output_zip_path.exists()); - - // Verify zip file sizes - let initial_size = std::fs::metadata(&initial_zip_path).unwrap().len(); - let output_size = std::fs::metadata(&output_zip_path).unwrap().len(); - assert_eq!(initial_size, output_size); - - // Verify the output zip contains the same graph - let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder).unwrap(); - - assert_graph_equal(&graph, &decoded_graph); - } - - #[test] - fn test_unzip_to_folder() { - let graph = Graph::new(); - - graph - .add_edge(0, 0, 1, [("test prop 1", Prop::map(NO_PROPS))], None) - .unwrap(); - graph - .add_edge( - 1, - 2, - 3, - [("test prop 1", Prop::map([("key", "value")]))], - Some("layer_a"), - ) - .unwrap(); - graph - .add_edge(2, 3, 4, [("test prop 2", "value")], Some("layer_b")) - .unwrap(); - graph - .add_edge(3, 1, 4, [("test prop 3", 10.0)], None) - .unwrap(); - graph - .add_edge(4, 1, 3, [("test prop 4", true)], None) - .unwrap(); - - graph - .node(1) - .unwrap() - .add_updates(5, [("test node prop", 5i32)], None) - .unwrap(); - - let temp_folder = tempfile::TempDir::new().unwrap(); - let folder = temp_folder.path().join("graph"); - let graph_folder = GraphFolder::from(&folder); - - graph.encode(&graph_folder).unwrap(); - assert!(graph_folder.graph_path().unwrap().exists()); - - // Zip the folder - let mut zip_bytes = Vec::new(); - let cursor = std::io::Cursor::new(&mut zip_bytes); - graph_folder.zip_from_folder(cursor).unwrap(); - - // Unzip to a new folder - let folder = temp_folder.path().join("unzip"); - let unzip_folder = GraphFolder::from(&folder); - let cursor = std::io::Cursor::new(&zip_bytes); - unzip_folder.unzip_to_folder(cursor).unwrap(); - - // Verify the extracted folder has the same structure - assert!(unzip_folder.graph_path().unwrap().exists()); - assert!(unzip_folder.meta_path().unwrap().exists()); - - // Verify the extracted graph is the same as the original - let extracted_graph = Graph::decode(&unzip_folder).unwrap(); - assert_graph_equal(&graph, &extracted_graph); - } -} diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index 7b49f01200..93185d3328 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -1,9 +1,7 @@ -mod graph_folder; pub mod metadata; pub mod parquet; mod serialise; -pub use graph_folder::*; pub use serialise::{StableDecode, StableEncode}; From f2ce07109cadb951f24bbf8891ad5d0093df1b2f Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 15 Jun 2026 05:53:39 -0400 Subject: [PATCH 06/20] Revert "Instead of moving constants only, move the whole graph_folder.rs file over. Now, we have many import errors because things from raphtory can't be imported (like GraphError)" This reverts commit 2cab13ca35d256d4c500030d4c69e4ff74b2a6d2. --- raphtory-api/Cargo.toml | 2 - raphtory-api/src/core/storage/graph_folder.rs | 868 +---------------- raphtory/src/serialise/graph_folder.rs | 870 ++++++++++++++++++ raphtory/src/serialise/mod.rs | 2 + 4 files changed, 873 insertions(+), 869 deletions(-) create mode 100644 raphtory/src/serialise/graph_folder.rs diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 2062afeab4..14818af8fa 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -45,8 +45,6 @@ iter-enum = { workspace = true } minijinja = { workspace = true, optional = true } display-error-chain = { workspace = true, optional = true } indexmap.workspace = true -walkdir.workspace = true -zip.workspace = true [dev-dependencies] proptest.workspace = true diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index d02b775220..fd13d76d74 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -1,884 +1,18 @@ -//! Raphtory container format for managing graph data. -//! -//! Folder structure: -//! -//! GraphFolder -//! ├── .raph # Metadata file (json: {path: "data{id}"}) pointing at the current data folder -//! └── data{id}/ # Data folder (incremental id for atomic replacement) -//! ├── .meta # Metadata file (json: {path: "graph{id}", meta: {}}) pointing at the current graph folder -//! ├── graph{id}/ # Graph data (incremental id for atomic replacement) -//! ├── index/ # Search indexes (optional) -//! └── vectors/ # Vector embeddings (optional) - -use crate::core::input::input_node::parse_u64_strict; -use itertools::Itertools; -use raphtory::{ - db::api::view::internal::GraphView, errors::GraphError, prelude::ParquetEncoder, - serialise::metadata::GraphMetadata, -}; -use serde::{Deserialize, Serialize}; -use std::{ - fs::{self, File}, - io::{self, ErrorKind, Read, Seek, Write}, - path::{Path, PathBuf}, -}; -use walkdir::WalkDir; -use zip::{write::FileOptions, ZipArchive, ZipWriter}; +//! const vars for file and directory names regarding exported graphs. /// Metadata file that stores path to the data folder. pub const ROOT_META_PATH: &str = ".raph"; - /// Outer most directory containing all data. pub const DATA_PATH: &str = "data"; pub const DEFAULT_DATA_PATH: &str = "data0"; - /// Metadata file that stores path to the graph folder and graph metadata. pub const GRAPH_META_PATH: &str = ".meta"; - /// Directory that stores graph data. pub const GRAPH_PATH: &str = "graph"; pub const DEFAULT_GRAPH_PATH: &str = "graph0"; - /// Directory that stores search indexes. pub const INDEX_PATH: &str = "index"; - /// Directory that stores vector embeddings of the graph. pub const VECTORS_PATH: &str = "vectors"; - /// Temporary metadata file for atomic replacement. pub const DIRTY_PATH: &str = ".dirty"; - -pub fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphError> { - relative_path - .strip_prefix(prefix) // should have the prefix - .and_then(parse_u64_strict) // the remainder should be the id - .ok_or_else(|| GraphError::InvalidRelativePath(relative_path.to_string()))?; - Ok(()) -} - -fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { - let mut value = String::new(); - file.read_to_string(&mut value)?; - let path: RelativePath = serde_json::from_str(&value)?; - valid_path_pointer(&path.path, prefix)?; - Ok(path.path) -} - -pub fn read_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result, GraphError> { - let file = match File::open(base_path.join(file_name)) { - Ok(file) => file, - Err(error) => { - return match error.kind() { - ErrorKind::NotFound => Ok(None), - _ => Err(error.into()), - } - } - }; - let path = read_path_from_file(file, prefix)?; - Ok(Some(path)) -} - -pub fn make_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result { - let mut id = read_path_pointer(base_path, file_name, prefix)? - .and_then(|path| { - path.strip_prefix(prefix) - .and_then(|id| id.parse::().ok()) - }) - .map_or(0, |id| id + 1); - - let mut path = format!("{prefix}{id}"); - while base_path.join(&path).exists() { - id += 1; - path = format!("{prefix}{id}"); - } - Ok(path) -} - -pub fn read_or_default_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result { - Ok(read_path_pointer(base_path, file_name, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) -} - -pub fn get_zip_data_path(zip: &mut ZipArchive) -> Result { - let file = zip.by_name(ROOT_META_PATH)?; - Ok(read_path_from_file(file, DATA_PATH)?) -} - -pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { - let mut path = get_zip_data_path(zip)?; - let graph_path = get_zip_graph_path_name(zip, path.clone())?; - path.push('/'); - path.push_str(&graph_path); - Ok(path) -} - -pub fn get_zip_graph_path_name( - zip: &mut ZipArchive, - mut data_path: String, -) -> Result { - data_path.push('/'); - data_path.push_str(GRAPH_META_PATH); - let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; - Ok(graph_path) -} - -pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { - let mut path = get_zip_data_path(zip)?; - path.push('/'); - path.push_str(GRAPH_META_PATH); - Ok(path) -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct RelativePath { - pub path: String, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct Metadata { - pub path: String, - pub meta: GraphMetadata, -} - -pub trait GraphPaths { - fn root(&self) -> &Path; - - fn root_meta_path(&self) -> PathBuf { - self.root().join(ROOT_META_PATH) - } - - fn data_path(&self) -> Result { - Ok(InnerGraphFolder { - path: self.root().join(self.relative_data_path()?), - }) - } - - fn vectors_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(VECTORS_PATH); - Ok(path) - } - - fn index_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(INDEX_PATH); - Ok(path) - } - - fn graph_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(self.relative_graph_path()?); - Ok(path) - } - - fn meta_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(GRAPH_META_PATH); - Ok(path) - } - - fn is_zip(&self) -> bool { - self.root().is_file() - } - - fn read_zip(&self) -> Result, GraphError> { - if self.is_zip() { - let file = File::open(self.root())?; - let archive = ZipArchive::new(file)?; - Ok(archive) - } else { - Err(GraphError::NotAZip) - } - } - - fn relative_data_path(&self) -> Result { - let path = if self.is_zip() { - let mut zip = self.read_zip()?; - get_zip_data_path(&mut zip)? - } else { - read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)? - }; - Ok(path) - } - - fn relative_graph_path(&self) -> Result { - if self.is_zip() { - let mut zip = self.read_zip()?; - let data_path = get_zip_data_path(&mut zip)?; - get_zip_graph_path_name(&mut zip, data_path) - } else { - let data_path = self.data_path()?; - read_or_default_path_pointer(data_path.as_ref(), GRAPH_META_PATH, GRAPH_PATH) - } - } - - fn read_metadata(&self) -> Result { - let mut json = String::new(); - if self.is_zip() { - let mut zip = self.read_zip()?; - let path = get_zip_meta_path(&mut zip)?; - let mut zip_file = zip.by_name(&path)?; - zip_file.read_to_string(&mut json)?; - } else { - let mut file = File::open(self.meta_path()?)?; - file.read_to_string(&mut json)?; - } - let metadata: Metadata = serde_json::from_str(&json)?; - Ok(metadata.meta) - } - - fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { - let graph_path = self.relative_graph_path()?; - let metadata = GraphMetadata::from_graph(graph); - let meta = Metadata { - path: graph_path, - meta: metadata, - }; - let tmp_path = self.data_path()?.path.join(".tmp"); - let tmp_file = File::create(&tmp_path)?; - serde_json::to_writer(tmp_file, &meta)?; - let path = self.meta_path()?; - fs::rename(tmp_path, path)?; - Ok(()) - } - - /// Returns true if folder is occupied by a graph. - fn is_reserved(&self) -> bool { - self.meta_path().map_or(false, |path| path.exists()) - } - - /// Initialise the data folder and metadata pointer - fn init(&self) -> Result<(), GraphError> { - if self.root().is_dir() { - let non_empty = self.root().read_dir()?.next().is_some(); - if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.root().into())); - } - } else { - fs::create_dir_all(self.root())? - } - - // Create the data folder and have the root metadata file point to it. - let data_path = self.relative_data_path()?; - fs::create_dir(self.root().join(&data_path))?; - fs::write( - self.root_meta_path(), - serde_json::to_string(&RelativePath { path: data_path })?, - )?; - - // Create the graph folder inside the data folder. - let graph_path = self.graph_path()?; - fs::create_dir(&graph_path)?; - - Ok(()) - } -} - -impl + ?Sized> GraphPaths for P { - fn root(&self) -> &Path { - self.as_ref() - } -} - -#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] -pub struct GraphFolder { - root_folder: PathBuf, - pub write_as_zip_format: bool, -} - -impl GraphPaths for GraphFolder { - fn root(&self) -> &Path { - &self.root_folder - } -} - -impl GraphFolder { - pub fn new_as_zip(path: impl AsRef) -> Self { - let folder: GraphFolder = path.into(); - Self { - write_as_zip_format: true, - ..folder - } - } - - /// Reserve a folder, marking it as occupied by a graph. - /// Returns an error if the folder has data. - pub fn init_write(self) -> Result { - if self.write_as_zip_format { - return Err(GraphError::ZippedGraphCannotBeSwapped); - } - let relative_data_path = self.relative_data_path()?; - let meta = serde_json::to_string(&RelativePath { - path: relative_data_path.clone(), - })?; - self.ensure_clean_root_dir()?; - let metapath = self.root_folder.join(DIRTY_PATH); - let mut path_file = File::create_new(&metapath)?; - path_file.write_all(meta.as_bytes())?; - fs::create_dir_all(self.root_folder.join(relative_data_path))?; - Ok(WriteableGraphFolder { - path: self.root_folder, - }) - } - - /// Prepare a graph folder for atomically swapping the data contents. - /// This returns an error if the folder is set to write as Zip. - /// - /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and - /// the contents of the corresponding folder are deleted. - pub fn init_swap(self) -> Result { - if self.write_as_zip_format { - return Err(GraphError::ZippedGraphCannotBeSwapped); - } - let old_swap = match read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH) { - Ok(path) => path, - Err(_) => { - fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up - None - } - }; - - fs::create_dir_all(self.root())?; - - let swap_path = match old_swap { - Some(relative_path) => { - let swap_path = self.root_folder.join(relative_path); - if swap_path.exists() { - fs::remove_dir_all(&swap_path)?; - } - swap_path - } - None => { - let new_relative_data_path = - make_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - let new_data_path = self.root_folder.join(&new_relative_data_path); - let meta = serde_json::to_string(&RelativePath { - path: new_relative_data_path, - })?; - let mut dirty_file = File::create_new(self.root_folder.join(DIRTY_PATH))?; - dirty_file.write_all(meta.as_bytes())?; - dirty_file.sync_all()?; - new_data_path - } - }; - fs::create_dir_all(swap_path)?; - Ok(WriteableGraphFolder { - path: self.root_folder, - }) - } - - /// Clears the folder of any contents. - pub fn clear(&self) -> Result<(), GraphError> { - if self.is_zip() { - return Err(GraphError::IOErrorMsg( - "Cannot clear a zip folder".to_string(), - )); - } - - fs::remove_dir_all(&self.root_folder)?; - fs::create_dir_all(&self.root_folder)?; - Ok(()) - } - - pub fn get_zip_graph_prefix(&self) -> Result { - if self.is_zip() { - let mut zip = self.read_zip()?; - Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) - } else { - let data_path = read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - let graph_path = read_or_default_path_pointer( - &self.root().join(&data_path), - GRAPH_META_PATH, - GRAPH_PATH, - )?; - Ok([data_path, graph_path].join("/")) - } - } - - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { - if self.root_folder.exists() { - let non_empty = self.root_folder.read_dir()?.next().is_some(); - if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.root_folder.clone())); - } - } else { - fs::create_dir(&self.root_folder)? - } - - Ok(()) - } - - pub fn is_disk_graph(&self) -> Result { - let meta = self.read_metadata()?; - Ok(meta.is_diskgraph) - } - - /// Creates a zip file from the folder. - pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphError> { - if self.is_zip() { - let mut reader = File::open(&self.root_folder)?; - io::copy(&mut reader, &mut writer)?; - } else { - let mut zip = ZipWriter::new(writer); - for entry in WalkDir::new(&self.root_folder) - .into_iter() - .filter_map(Result::ok) - { - let path = entry.path(); - let rel_path = path.strip_prefix(&self.root_folder).map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) - })?; - - let zip_entry_name = rel_path - .components() - .map(|name| name.as_os_str().to_string_lossy()) - .join("/"); - - if path.is_file() { - zip.start_file::<_, ()>(zip_entry_name, FileOptions::default())?; - - let mut file = File::open(path)?; - std::io::copy(&mut file, &mut zip)?; - } else if path.is_dir() && !zip_entry_name.is_empty() { - // Add empty directories to the zip - zip.add_directory::<_, ()>(zip_entry_name, FileOptions::default())?; - } - } - - zip.finish()?; - } - Ok(()) - } - - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { - self.ensure_clean_root_dir()?; - let mut archive = ZipArchive::new(reader)?; - archive.extract(self.root())?; - Ok(()) - } -} - -#[must_use] -#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] -pub struct WriteableGraphFolder { - path: PathBuf, -} - -impl GraphPaths for WriteableGraphFolder { - fn root(&self) -> &Path { - &self.path - } - - fn relative_data_path(&self) -> Result { - let path = read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH)? - .ok_or(GraphError::NoWriteInProgress)?; - Ok(path) - } - - fn relative_graph_path(&self) -> Result { - let path = - read_or_default_path_pointer(&self.data_path()?.as_ref(), GRAPH_META_PATH, GRAPH_PATH)?; - Ok(path) - } - - fn init(&self) -> Result<(), GraphError> { - Ok(()) - } -} - -impl WriteableGraphFolder { - /// Finalise an in-progress write by atomically renaming the '.dirty' file to '.raph' - /// and cleaning up any old data if it exists. - /// - /// This operation returns an error if there is no write in progress. - pub fn finish(self) -> Result { - let old_data = read_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - fs::rename( - self.root().join(DIRTY_PATH), - self.root().join(ROOT_META_PATH), - )?; - if let Some(old_data) = old_data { - let old_data_path = self.root().join(old_data); - if old_data_path.is_dir() { - fs::remove_dir_all(old_data_path)?; - } - } - Ok(GraphFolder { - root_folder: self.path, - write_as_zip_format: false, - }) - } -} - -#[derive(Clone, Debug)] -pub struct InnerGraphFolder { - path: PathBuf, -} - -impl AsRef for InnerGraphFolder { - fn as_ref(&self) -> &Path { - &self.path - } -} - -impl InnerGraphFolder { - pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { - let graph_path = self.relative_graph_path()?; - let metadata = GraphMetadata::from_graph(graph); - let meta = Metadata { - path: graph_path, - meta: metadata, - }; - let path = self.meta_path(); - let file = File::create(&path)?; - Ok(serde_json::to_writer(file, &meta)?) - } - - pub fn read_metadata(&self) -> Result { - let mut json = String::new(); - let mut file = File::open(self.meta_path())?; - file.read_to_string(&mut json)?; - let metadata: Metadata = serde_json::from_str(&json)?; - Ok(metadata.meta) - } - - pub fn replace_graph( - &self, - graph: impl ParquetEncoder + GraphView + std::fmt::Debug, - ) -> Result<(), GraphError> { - let data_path = self.as_ref(); - let old_relative_graph_path = self.relative_graph_path()?; - let old_graph_path = self.path.join(&old_relative_graph_path); - let meta = GraphMetadata::from_graph(&graph); - let new_relative_graph_path = make_path_pointer(data_path, GRAPH_META_PATH, GRAPH_PATH)?; - graph.encode_parquet(data_path.join(&new_relative_graph_path))?; - - let dirty_path = data_path.join(DIRTY_PATH); - fs::write( - &dirty_path, - &serde_json::to_vec(&Metadata { - path: new_relative_graph_path.clone(), - meta, - })?, - )?; - fs::rename(&dirty_path, data_path.join(GRAPH_META_PATH))?; - if new_relative_graph_path != old_relative_graph_path { - fs::remove_dir_all(old_graph_path)?; - } - Ok(()) - } - pub fn vectors_path(&self) -> PathBuf { - self.path.join(VECTORS_PATH) - } - - pub fn index_path(&self) -> PathBuf { - self.path.join(INDEX_PATH) - } - - pub fn meta_path(&self) -> PathBuf { - self.path.join(GRAPH_META_PATH) - } - - pub fn relative_graph_path(&self) -> Result { - let relative = read_or_default_path_pointer(&self.path, GRAPH_META_PATH, GRAPH_PATH)?; - Ok(relative) - } - - pub fn graph_path(&self) -> Result { - Ok(self.path.join(self.relative_graph_path()?)) - } - - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { - if self.as_ref().exists() { - let non_empty = self.as_ref().read_dir()?.next().is_some(); - if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.as_ref().to_path_buf())); - } - } else { - fs::create_dir_all(self)? - } - Ok(()) - } - - /// Extracts a zip file to the folder. - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { - self.ensure_clean_root_dir()?; - - let mut zip = ZipArchive::new(reader)?; - let data_dir = get_zip_data_path(&mut zip)?; - - for i in 0..zip.len() { - let mut file = zip.by_index(i)?; - let zip_entry_name = match file.enclosed_name() { - Some(name) => name, - None => continue, - }; - if let Ok(inner_path) = zip_entry_name.strip_prefix(&data_dir) { - let out_path = self.as_ref().join(inner_path); - if file.is_dir() { - std::fs::create_dir_all(&out_path)?; - } else { - // Create any parent directories - if let Some(parent) = out_path.parent() { - std::fs::create_dir_all(parent)?; - } - - let mut out_file = std::fs::File::create(&out_path)?; - std::io::copy(&mut file, &mut out_file)?; - } - } - } - - Ok(()) - } -} - -impl> From

for GraphFolder { - fn from(value: P) -> Self { - let path: &Path = value.as_ref(); - Self { - root_folder: path.to_path_buf(), - write_as_zip_format: false, - } - } -} - -impl From<&GraphFolder> for GraphFolder { - fn from(value: &GraphFolder) -> Self { - value.clone() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - // /// Verify that the metadata is re-created if it does not exist. - // #[test] - // #[ignore = "Need to think about how to deal with reading old format"] - // fn test_read_metadata_from_noninitialized_zip() { - // global_info_logger(); - // - // let graph = Graph::new(); - // graph.add_node(0, 0, NO_PROPS, None).unwrap(); - // - // let tmp_dir = tempfile::TempDir::new().unwrap(); - // let zip_path = tmp_dir.path().join("graph.zip"); - // let folder = GraphFolder::new_as_zip(&zip_path); - // graph.encode(&folder).unwrap(); - // - // // Remove the metadata file from the zip to simulate a noninitialized zip - // remove_metadata_from_zip(&zip_path); - // - // // Should fail because the metadata file is not present - // let err = folder.try_read_metadata(); - // assert!(err.is_err()); - // - // // Should re-create the metadata file - // let result = folder.read_metadata().unwrap(); - // assert_eq!( - // result, - // GraphMetadata { - // node_count: 1, - // edge_count: 0, - // metadata: vec![], - // graph_type: GraphType::EventGraph, - // is_diskgraph: false - // } - // ); - // } - - // /// Helper function to remove the metadata file from a zip - // fn remove_metadata_from_zip(zip_path: &Path) { - // let mut zip_file = std::fs::File::open(&zip_path).unwrap(); - // let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); - // let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); - // - // // Scope for the zip writer - // { - // let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); - // - // for i in 0..zip_archive.len() { - // let mut file = zip_archive.by_index(i).unwrap(); - // - // // Copy all files except the metadata file - // if file.name() != META_PATH { - // zip_writer - // .start_file::<_, ()>(file.name(), FileOptions::default()) - // .unwrap(); - // std::io::copy(&mut file, &mut zip_writer).unwrap(); - // } - // } - // - // zip_writer.finish().unwrap(); - // } - // - // std::fs::copy(temp_zip.path(), &zip_path).unwrap(); - // } - - // /// Verify that the metadata is re-created if it does not exist. - // #[test] - // #[ignore = "Need to think about how to handle reading from old format"] - // fn test_read_metadata_from_noninitialized_folder() { - // global_info_logger(); - // - // let graph = Graph::new(); - // graph.add_node(0, 0, NO_PROPS, None).unwrap(); - // - // let temp_folder = tempfile::TempDir::new().unwrap(); - // let folder = GraphFolder::from(temp_folder.path()); - // graph.encode(&folder).unwrap(); - // - // // Remove the metadata file - // std::fs::remove_file(folder.get_meta_path()).unwrap(); - // - // // Should fail because the metadata file is not present - // let err = folder.try_read_metadata(); - // assert!(err.is_err()); - // - // // Should re-create the metadata file - // let result = folder.read_metadata().unwrap(); - // assert_eq!( - // result, - // GraphMetadata { - // node_count: 1, - // edge_count: 0, - // metadata: vec![], - // graph_type: GraphType::EventGraph, - // is_diskgraph: false - // } - // ); - // } - #[test] - fn test_zip_from_folder() { - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); - graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); - graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); - - // Create a regular folder and encode the graph - let temp_folder = tempfile::TempDir::new().unwrap(); - let initial_folder = GraphFolder::from(temp_folder.path().join("initial")); - graph.encode(&initial_folder).unwrap(); - - assert!(initial_folder.graph_path().unwrap().exists()); - assert!(initial_folder.meta_path().unwrap().exists()); - - // Create a zip file from the folder - let output_zip_path = temp_folder.path().join("output.zip"); - let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); - initial_folder.zip_from_folder(output_zip_file).unwrap(); - - assert!(output_zip_path.exists()); - - // Verify the output zip contains the same graph - let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder).unwrap(); - - assert_graph_equal(&graph, &decoded_graph); - } - - #[test] - fn test_zip_from_zip() { - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); - graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); - graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); - - // Create an initial zip file - let temp_folder = tempfile::TempDir::new().unwrap(); - let initial_zip_path = temp_folder.path().join("initial.zip"); - let initial_folder = GraphFolder::new_as_zip(&initial_zip_path); - graph.encode(&initial_folder).unwrap(); - - assert!(initial_zip_path.exists()); - - // Create a new zip file from the existing zip - let output_zip_path = temp_folder.path().join("output.zip"); - let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); - initial_folder.zip_from_folder(output_zip_file).unwrap(); - - assert!(output_zip_path.exists()); - - // Verify zip file sizes - let initial_size = std::fs::metadata(&initial_zip_path).unwrap().len(); - let output_size = std::fs::metadata(&output_zip_path).unwrap().len(); - assert_eq!(initial_size, output_size); - - // Verify the output zip contains the same graph - let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder).unwrap(); - - assert_graph_equal(&graph, &decoded_graph); - } - - #[test] - fn test_unzip_to_folder() { - let graph = Graph::new(); - - graph - .add_edge(0, 0, 1, [("test prop 1", Prop::map(NO_PROPS))], None) - .unwrap(); - graph - .add_edge( - 1, - 2, - 3, - [("test prop 1", Prop::map([("key", "value")]))], - Some("layer_a"), - ) - .unwrap(); - graph - .add_edge(2, 3, 4, [("test prop 2", "value")], Some("layer_b")) - .unwrap(); - graph - .add_edge(3, 1, 4, [("test prop 3", 10.0)], None) - .unwrap(); - graph - .add_edge(4, 1, 3, [("test prop 4", true)], None) - .unwrap(); - - graph - .node(1) - .unwrap() - .add_updates(5, [("test node prop", 5i32)], None) - .unwrap(); - - let temp_folder = tempfile::TempDir::new().unwrap(); - let folder = temp_folder.path().join("graph"); - let graph_folder = GraphFolder::from(&folder); - - graph.encode(&graph_folder).unwrap(); - assert!(graph_folder.graph_path().unwrap().exists()); - - // Zip the folder - let mut zip_bytes = Vec::new(); - let cursor = std::io::Cursor::new(&mut zip_bytes); - graph_folder.zip_from_folder(cursor).unwrap(); - - // Unzip to a new folder - let folder = temp_folder.path().join("unzip"); - let unzip_folder = GraphFolder::from(&folder); - let cursor = std::io::Cursor::new(&zip_bytes); - unzip_folder.unzip_to_folder(cursor).unwrap(); - - // Verify the extracted folder has the same structure - assert!(unzip_folder.graph_path().unwrap().exists()); - assert!(unzip_folder.meta_path().unwrap().exists()); - - // Verify the extracted graph is the same as the original - let extracted_graph = Graph::decode(&unzip_folder).unwrap(); - assert_graph_equal(&graph, &extracted_graph); - } -} diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs new file mode 100644 index 0000000000..58b2353972 --- /dev/null +++ b/raphtory/src/serialise/graph_folder.rs @@ -0,0 +1,870 @@ +//! Raphtory container format for managing graph data. +//! +//! Folder structure: +//! +//! GraphFolder +//! ├── .raph # Metadata file (json: {path: "data{id}"}) pointing at the current data folder +//! └── data{id}/ # Data folder (incremental id for atomic replacement) +//! ├── .meta # Metadata file (json: {path: "graph{id}", meta: {}}) pointing at the current graph folder +//! ├── graph{id}/ # Graph data (incremental id for atomic replacement) +//! ├── index/ # Search indexes (optional) +//! └── vectors/ # Vector embeddings (optional) + +use crate::{ + db::api::view::internal::GraphView, errors::GraphError, prelude::ParquetEncoder, + serialise::metadata::GraphMetadata, +}; +use itertools::Itertools; +use raphtory_api::core::{ + input::input_node::parse_u64_strict, + storage::graph_folder::{ + DATA_PATH, DIRTY_PATH, GRAPH_META_PATH, GRAPH_PATH, INDEX_PATH, ROOT_META_PATH, + VECTORS_PATH, + }, +}; +use serde::{Deserialize, Serialize}; +use std::{ + fs::{self, File}, + io::{self, ErrorKind, Read, Seek, Write}, + path::{Path, PathBuf}, +}; +use walkdir::WalkDir; +use zip::{write::FileOptions, ZipArchive, ZipWriter}; + +pub(crate) fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphError> { + relative_path + .strip_prefix(prefix) // should have the prefix + .and_then(parse_u64_strict) // the remainder should be the id + .ok_or_else(|| GraphError::InvalidRelativePath(relative_path.to_string()))?; + Ok(()) +} + +fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { + let mut value = String::new(); + file.read_to_string(&mut value)?; + let path: RelativePath = serde_json::from_str(&value)?; + valid_path_pointer(&path.path, prefix)?; + Ok(path.path) +} + +pub fn read_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result, GraphError> { + let file = match File::open(base_path.join(file_name)) { + Ok(file) => file, + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(error.into()), + } + } + }; + let path = read_path_from_file(file, prefix)?; + Ok(Some(path)) +} + +pub fn make_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result { + let mut id = read_path_pointer(base_path, file_name, prefix)? + .and_then(|path| { + path.strip_prefix(prefix) + .and_then(|id| id.parse::().ok()) + }) + .map_or(0, |id| id + 1); + + let mut path = format!("{prefix}{id}"); + while base_path.join(&path).exists() { + id += 1; + path = format!("{prefix}{id}"); + } + Ok(path) +} + +pub fn read_or_default_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result { + Ok(read_path_pointer(base_path, file_name, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) +} + +pub fn get_zip_data_path(zip: &mut ZipArchive) -> Result { + let file = zip.by_name(ROOT_META_PATH)?; + Ok(read_path_from_file(file, DATA_PATH)?) +} + +pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { + let mut path = get_zip_data_path(zip)?; + let graph_path = get_zip_graph_path_name(zip, path.clone())?; + path.push('/'); + path.push_str(&graph_path); + Ok(path) +} + +pub fn get_zip_graph_path_name( + zip: &mut ZipArchive, + mut data_path: String, +) -> Result { + data_path.push('/'); + data_path.push_str(GRAPH_META_PATH); + let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; + Ok(graph_path) +} + +pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { + let mut path = get_zip_data_path(zip)?; + path.push('/'); + path.push_str(GRAPH_META_PATH); + Ok(path) +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct RelativePath { + pub path: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Metadata { + pub path: String, + pub meta: GraphMetadata, +} + +pub trait GraphPaths { + fn root(&self) -> &Path; + + fn root_meta_path(&self) -> PathBuf { + self.root().join(ROOT_META_PATH) + } + + fn data_path(&self) -> Result { + Ok(InnerGraphFolder { + path: self.root().join(self.relative_data_path()?), + }) + } + + fn vectors_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(VECTORS_PATH); + Ok(path) + } + + fn index_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(INDEX_PATH); + Ok(path) + } + + fn graph_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(self.relative_graph_path()?); + Ok(path) + } + + fn meta_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(GRAPH_META_PATH); + Ok(path) + } + + fn is_zip(&self) -> bool { + self.root().is_file() + } + + fn read_zip(&self) -> Result, GraphError> { + if self.is_zip() { + let file = File::open(self.root())?; + let archive = ZipArchive::new(file)?; + Ok(archive) + } else { + Err(GraphError::NotAZip) + } + } + + fn relative_data_path(&self) -> Result { + let path = if self.is_zip() { + let mut zip = self.read_zip()?; + get_zip_data_path(&mut zip)? + } else { + read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)? + }; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + let data_path = get_zip_data_path(&mut zip)?; + get_zip_graph_path_name(&mut zip, data_path) + } else { + let data_path = self.data_path()?; + read_or_default_path_pointer(data_path.as_ref(), GRAPH_META_PATH, GRAPH_PATH) + } + } + + fn read_metadata(&self) -> Result { + let mut json = String::new(); + if self.is_zip() { + let mut zip = self.read_zip()?; + let path = get_zip_meta_path(&mut zip)?; + let mut zip_file = zip.by_name(&path)?; + zip_file.read_to_string(&mut json)?; + } else { + let mut file = File::open(self.meta_path()?)?; + file.read_to_string(&mut json)?; + } + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { + let graph_path = self.relative_graph_path()?; + let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: metadata, + }; + let tmp_path = self.data_path()?.path.join(".tmp"); + let tmp_file = File::create(&tmp_path)?; + serde_json::to_writer(tmp_file, &meta)?; + let path = self.meta_path()?; + fs::rename(tmp_path, path)?; + Ok(()) + } + + /// Returns true if folder is occupied by a graph. + fn is_reserved(&self) -> bool { + self.meta_path().map_or(false, |path| path.exists()) + } + + /// Initialise the data folder and metadata pointer + fn init(&self) -> Result<(), GraphError> { + if self.root().is_dir() { + let non_empty = self.root().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.root().into())); + } + } else { + fs::create_dir_all(self.root())? + } + + // Create the data folder and have the root metadata file point to it. + let data_path = self.relative_data_path()?; + fs::create_dir(self.root().join(&data_path))?; + fs::write( + self.root_meta_path(), + serde_json::to_string(&RelativePath { path: data_path })?, + )?; + + // Create the graph folder inside the data folder. + let graph_path = self.graph_path()?; + fs::create_dir(&graph_path)?; + + Ok(()) + } +} + +impl + ?Sized> GraphPaths for P { + fn root(&self) -> &Path { + self.as_ref() + } +} + +#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] +pub struct GraphFolder { + root_folder: PathBuf, + pub(crate) write_as_zip_format: bool, +} + +impl GraphPaths for GraphFolder { + fn root(&self) -> &Path { + &self.root_folder + } +} + +impl GraphFolder { + pub fn new_as_zip(path: impl AsRef) -> Self { + let folder: GraphFolder = path.into(); + Self { + write_as_zip_format: true, + ..folder + } + } + + /// Reserve a folder, marking it as occupied by a graph. + /// Returns an error if the folder has data. + pub fn init_write(self) -> Result { + if self.write_as_zip_format { + return Err(GraphError::ZippedGraphCannotBeSwapped); + } + let relative_data_path = self.relative_data_path()?; + let meta = serde_json::to_string(&RelativePath { + path: relative_data_path.clone(), + })?; + self.ensure_clean_root_dir()?; + let metapath = self.root_folder.join(DIRTY_PATH); + let mut path_file = File::create_new(&metapath)?; + path_file.write_all(meta.as_bytes())?; + fs::create_dir_all(self.root_folder.join(relative_data_path))?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) + } + + /// Prepare a graph folder for atomically swapping the data contents. + /// This returns an error if the folder is set to write as Zip. + /// + /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and + /// the contents of the corresponding folder are deleted. + pub fn init_swap(self) -> Result { + if self.write_as_zip_format { + return Err(GraphError::ZippedGraphCannotBeSwapped); + } + let old_swap = match read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH) { + Ok(path) => path, + Err(_) => { + fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up + None + } + }; + + fs::create_dir_all(self.root())?; + + let swap_path = match old_swap { + Some(relative_path) => { + let swap_path = self.root_folder.join(relative_path); + if swap_path.exists() { + fs::remove_dir_all(&swap_path)?; + } + swap_path + } + None => { + let new_relative_data_path = + make_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + let new_data_path = self.root_folder.join(&new_relative_data_path); + let meta = serde_json::to_string(&RelativePath { + path: new_relative_data_path, + })?; + let mut dirty_file = File::create_new(self.root_folder.join(DIRTY_PATH))?; + dirty_file.write_all(meta.as_bytes())?; + dirty_file.sync_all()?; + new_data_path + } + }; + fs::create_dir_all(swap_path)?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) + } + + /// Clears the folder of any contents. + pub fn clear(&self) -> Result<(), GraphError> { + if self.is_zip() { + return Err(GraphError::IOErrorMsg( + "Cannot clear a zip folder".to_string(), + )); + } + + fs::remove_dir_all(&self.root_folder)?; + fs::create_dir_all(&self.root_folder)?; + Ok(()) + } + + pub fn get_zip_graph_prefix(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) + } else { + let data_path = read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + let graph_path = read_or_default_path_pointer( + &self.root().join(&data_path), + GRAPH_META_PATH, + GRAPH_PATH, + )?; + Ok([data_path, graph_path].join("/")) + } + } + + fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { + if self.root_folder.exists() { + let non_empty = self.root_folder.read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.root_folder.clone())); + } + } else { + fs::create_dir(&self.root_folder)? + } + + Ok(()) + } + + pub fn is_disk_graph(&self) -> Result { + let meta = self.read_metadata()?; + Ok(meta.is_diskgraph) + } + + /// Creates a zip file from the folder. + pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphError> { + if self.is_zip() { + let mut reader = File::open(&self.root_folder)?; + io::copy(&mut reader, &mut writer)?; + } else { + let mut zip = ZipWriter::new(writer); + for entry in WalkDir::new(&self.root_folder) + .into_iter() + .filter_map(Result::ok) + { + let path = entry.path(); + let rel_path = path.strip_prefix(&self.root_folder).map_err(|e| { + GraphError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) + })?; + + let zip_entry_name = rel_path + .components() + .map(|name| name.as_os_str().to_string_lossy()) + .join("/"); + + if path.is_file() { + zip.start_file::<_, ()>(zip_entry_name, FileOptions::default())?; + + let mut file = File::open(path)?; + std::io::copy(&mut file, &mut zip)?; + } else if path.is_dir() && !zip_entry_name.is_empty() { + // Add empty directories to the zip + zip.add_directory::<_, ()>(zip_entry_name, FileOptions::default())?; + } + } + + zip.finish()?; + } + Ok(()) + } + + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { + self.ensure_clean_root_dir()?; + let mut archive = ZipArchive::new(reader)?; + archive.extract(self.root())?; + Ok(()) + } +} + +#[must_use] +#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] +pub struct WriteableGraphFolder { + path: PathBuf, +} + +impl GraphPaths for WriteableGraphFolder { + fn root(&self) -> &Path { + &self.path + } + + fn relative_data_path(&self) -> Result { + let path = read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH)? + .ok_or(GraphError::NoWriteInProgress)?; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + let path = + read_or_default_path_pointer(&self.data_path()?.as_ref(), GRAPH_META_PATH, GRAPH_PATH)?; + Ok(path) + } + + fn init(&self) -> Result<(), GraphError> { + Ok(()) + } +} + +impl WriteableGraphFolder { + /// Finalise an in-progress write by atomically renaming the '.dirty' file to '.raph' + /// and cleaning up any old data if it exists. + /// + /// This operation returns an error if there is no write in progress. + pub fn finish(self) -> Result { + let old_data = read_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + fs::rename( + self.root().join(DIRTY_PATH), + self.root().join(ROOT_META_PATH), + )?; + if let Some(old_data) = old_data { + let old_data_path = self.root().join(old_data); + if old_data_path.is_dir() { + fs::remove_dir_all(old_data_path)?; + } + } + Ok(GraphFolder { + root_folder: self.path, + write_as_zip_format: false, + }) + } +} + +#[derive(Clone, Debug)] +pub struct InnerGraphFolder { + path: PathBuf, +} + +impl AsRef for InnerGraphFolder { + fn as_ref(&self) -> &Path { + &self.path + } +} + +impl InnerGraphFolder { + pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { + let graph_path = self.relative_graph_path()?; + let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: metadata, + }; + let path = self.meta_path(); + let file = File::create(&path)?; + Ok(serde_json::to_writer(file, &meta)?) + } + + pub fn read_metadata(&self) -> Result { + let mut json = String::new(); + let mut file = File::open(self.meta_path())?; + file.read_to_string(&mut json)?; + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + pub fn replace_graph( + &self, + graph: impl ParquetEncoder + GraphView + std::fmt::Debug, + ) -> Result<(), GraphError> { + let data_path = self.as_ref(); + let old_relative_graph_path = self.relative_graph_path()?; + let old_graph_path = self.path.join(&old_relative_graph_path); + let meta = GraphMetadata::from_graph(&graph); + let new_relative_graph_path = make_path_pointer(data_path, GRAPH_META_PATH, GRAPH_PATH)?; + graph.encode_parquet(data_path.join(&new_relative_graph_path))?; + + let dirty_path = data_path.join(DIRTY_PATH); + fs::write( + &dirty_path, + &serde_json::to_vec(&Metadata { + path: new_relative_graph_path.clone(), + meta, + })?, + )?; + fs::rename(&dirty_path, data_path.join(GRAPH_META_PATH))?; + if new_relative_graph_path != old_relative_graph_path { + fs::remove_dir_all(old_graph_path)?; + } + Ok(()) + } + pub fn vectors_path(&self) -> PathBuf { + self.path.join(VECTORS_PATH) + } + + pub fn index_path(&self) -> PathBuf { + self.path.join(INDEX_PATH) + } + + pub fn meta_path(&self) -> PathBuf { + self.path.join(GRAPH_META_PATH) + } + + pub fn relative_graph_path(&self) -> Result { + let relative = read_or_default_path_pointer(&self.path, GRAPH_META_PATH, GRAPH_PATH)?; + Ok(relative) + } + + pub fn graph_path(&self) -> Result { + Ok(self.path.join(self.relative_graph_path()?)) + } + + fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { + if self.as_ref().exists() { + let non_empty = self.as_ref().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.as_ref().to_path_buf())); + } + } else { + fs::create_dir_all(self)? + } + Ok(()) + } + + /// Extracts a zip file to the folder. + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { + self.ensure_clean_root_dir()?; + + let mut zip = ZipArchive::new(reader)?; + let data_dir = get_zip_data_path(&mut zip)?; + + for i in 0..zip.len() { + let mut file = zip.by_index(i)?; + let zip_entry_name = match file.enclosed_name() { + Some(name) => name, + None => continue, + }; + if let Ok(inner_path) = zip_entry_name.strip_prefix(&data_dir) { + let out_path = self.as_ref().join(inner_path); + if file.is_dir() { + std::fs::create_dir_all(&out_path)?; + } else { + // Create any parent directories + if let Some(parent) = out_path.parent() { + std::fs::create_dir_all(parent)?; + } + + let mut out_file = std::fs::File::create(&out_path)?; + std::io::copy(&mut file, &mut out_file)?; + } + } + } + + Ok(()) + } +} + +impl> From

for GraphFolder { + fn from(value: P) -> Self { + let path: &Path = value.as_ref(); + Self { + root_folder: path.to_path_buf(), + write_as_zip_format: false, + } + } +} + +impl From<&GraphFolder> for GraphFolder { + fn from(value: &GraphFolder) -> Self { + value.clone() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + db::graph::graph::assert_graph_equal, prelude::*, serialise::serialise::StableDecode, + }; + + // /// Verify that the metadata is re-created if it does not exist. + // #[test] + // #[ignore = "Need to think about how to deal with reading old format"] + // fn test_read_metadata_from_noninitialized_zip() { + // global_info_logger(); + // + // let graph = Graph::new(); + // graph.add_node(0, 0, NO_PROPS, None).unwrap(); + // + // let tmp_dir = tempfile::TempDir::new().unwrap(); + // let zip_path = tmp_dir.path().join("graph.zip"); + // let folder = GraphFolder::new_as_zip(&zip_path); + // graph.encode(&folder).unwrap(); + // + // // Remove the metadata file from the zip to simulate a noninitialized zip + // remove_metadata_from_zip(&zip_path); + // + // // Should fail because the metadata file is not present + // let err = folder.try_read_metadata(); + // assert!(err.is_err()); + // + // // Should re-create the metadata file + // let result = folder.read_metadata().unwrap(); + // assert_eq!( + // result, + // GraphMetadata { + // node_count: 1, + // edge_count: 0, + // metadata: vec![], + // graph_type: GraphType::EventGraph, + // is_diskgraph: false + // } + // ); + // } + + // /// Helper function to remove the metadata file from a zip + // fn remove_metadata_from_zip(zip_path: &Path) { + // let mut zip_file = std::fs::File::open(&zip_path).unwrap(); + // let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); + // let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); + // + // // Scope for the zip writer + // { + // let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); + // + // for i in 0..zip_archive.len() { + // let mut file = zip_archive.by_index(i).unwrap(); + // + // // Copy all files except the metadata file + // if file.name() != META_PATH { + // zip_writer + // .start_file::<_, ()>(file.name(), FileOptions::default()) + // .unwrap(); + // std::io::copy(&mut file, &mut zip_writer).unwrap(); + // } + // } + // + // zip_writer.finish().unwrap(); + // } + // + // std::fs::copy(temp_zip.path(), &zip_path).unwrap(); + // } + + // /// Verify that the metadata is re-created if it does not exist. + // #[test] + // #[ignore = "Need to think about how to handle reading from old format"] + // fn test_read_metadata_from_noninitialized_folder() { + // global_info_logger(); + // + // let graph = Graph::new(); + // graph.add_node(0, 0, NO_PROPS, None).unwrap(); + // + // let temp_folder = tempfile::TempDir::new().unwrap(); + // let folder = GraphFolder::from(temp_folder.path()); + // graph.encode(&folder).unwrap(); + // + // // Remove the metadata file + // std::fs::remove_file(folder.get_meta_path()).unwrap(); + // + // // Should fail because the metadata file is not present + // let err = folder.try_read_metadata(); + // assert!(err.is_err()); + // + // // Should re-create the metadata file + // let result = folder.read_metadata().unwrap(); + // assert_eq!( + // result, + // GraphMetadata { + // node_count: 1, + // edge_count: 0, + // metadata: vec![], + // graph_type: GraphType::EventGraph, + // is_diskgraph: false + // } + // ); + // } + #[test] + fn test_zip_from_folder() { + let graph = Graph::new(); + graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); + graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); + graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); + + // Create a regular folder and encode the graph + let temp_folder = tempfile::TempDir::new().unwrap(); + let initial_folder = GraphFolder::from(temp_folder.path().join("initial")); + graph.encode(&initial_folder).unwrap(); + + assert!(initial_folder.graph_path().unwrap().exists()); + assert!(initial_folder.meta_path().unwrap().exists()); + + // Create a zip file from the folder + let output_zip_path = temp_folder.path().join("output.zip"); + let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); + initial_folder.zip_from_folder(output_zip_file).unwrap(); + + assert!(output_zip_path.exists()); + + // Verify the output zip contains the same graph + let zip_folder = GraphFolder::new_as_zip(&output_zip_path); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); + + assert_graph_equal(&graph, &decoded_graph); + } + + #[test] + fn test_zip_from_zip() { + let graph = Graph::new(); + graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); + graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); + graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); + + // Create an initial zip file + let temp_folder = tempfile::TempDir::new().unwrap(); + let initial_zip_path = temp_folder.path().join("initial.zip"); + let initial_folder = GraphFolder::new_as_zip(&initial_zip_path); + graph.encode(&initial_folder).unwrap(); + + assert!(initial_zip_path.exists()); + + // Create a new zip file from the existing zip + let output_zip_path = temp_folder.path().join("output.zip"); + let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); + initial_folder.zip_from_folder(output_zip_file).unwrap(); + + assert!(output_zip_path.exists()); + + // Verify zip file sizes + let initial_size = std::fs::metadata(&initial_zip_path).unwrap().len(); + let output_size = std::fs::metadata(&output_zip_path).unwrap().len(); + assert_eq!(initial_size, output_size); + + // Verify the output zip contains the same graph + let zip_folder = GraphFolder::new_as_zip(&output_zip_path); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); + + assert_graph_equal(&graph, &decoded_graph); + } + + #[test] + fn test_unzip_to_folder() { + let graph = Graph::new(); + + graph + .add_edge(0, 0, 1, [("test prop 1", Prop::map(NO_PROPS))], None) + .unwrap(); + graph + .add_edge( + 1, + 2, + 3, + [("test prop 1", Prop::map([("key", "value")]))], + Some("layer_a"), + ) + .unwrap(); + graph + .add_edge(2, 3, 4, [("test prop 2", "value")], Some("layer_b")) + .unwrap(); + graph + .add_edge(3, 1, 4, [("test prop 3", 10.0)], None) + .unwrap(); + graph + .add_edge(4, 1, 3, [("test prop 4", true)], None) + .unwrap(); + + graph + .node(1) + .unwrap() + .add_updates(5, [("test node prop", 5i32)], None) + .unwrap(); + + let temp_folder = tempfile::TempDir::new().unwrap(); + let folder = temp_folder.path().join("graph"); + let graph_folder = GraphFolder::from(&folder); + + graph.encode(&graph_folder).unwrap(); + assert!(graph_folder.graph_path().unwrap().exists()); + + // Zip the folder + let mut zip_bytes = Vec::new(); + let cursor = std::io::Cursor::new(&mut zip_bytes); + graph_folder.zip_from_folder(cursor).unwrap(); + + // Unzip to a new folder + let folder = temp_folder.path().join("unzip"); + let unzip_folder = GraphFolder::from(&folder); + let cursor = std::io::Cursor::new(&zip_bytes); + unzip_folder.unzip_to_folder(cursor).unwrap(); + + // Verify the extracted folder has the same structure + assert!(unzip_folder.graph_path().unwrap().exists()); + assert!(unzip_folder.meta_path().unwrap().exists()); + + // Verify the extracted graph is the same as the original + let extracted_graph = Graph::decode(&unzip_folder).unwrap(); + assert_graph_equal(&graph, &extracted_graph); + } +} diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index 93185d3328..7b49f01200 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -1,7 +1,9 @@ +mod graph_folder; pub mod metadata; pub mod parquet; mod serialise; +pub use graph_folder::*; pub use serialise::{StableDecode, StableEncode}; From 8b826a9dbb7ff63334e3c8a7f281cf34e42b0fb2 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 15 Jun 2026 10:30:27 -0400 Subject: [PATCH 07/20] Run rustfmt --- raphtory-graphql/src/lib.rs | 3 - raphtory-graphql/src/model/graph/filtering.rs | 43 +++++-- raphtory-tests/tests/test_filters.rs | 46 ++++---- raphtory/src/db/api/state/ops/filter.rs | 22 ++-- .../graph/views/filter/model/degree_filter.rs | 105 +++++++++++------- .../src/db/graph/views/filter/model/mod.rs | 3 +- .../views/filter/model/node_filter/mod.rs | 45 +++++--- .../src/python/filter/node_filter_builders.rs | 12 +- 8 files changed, 174 insertions(+), 105 deletions(-) diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 184f6a0049..1a3f2acfde 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -536,7 +536,6 @@ mod graphql_test { graph } - fn degree_graph_with_add_edge_only() -> Graph { let graph = Graph::new(); @@ -646,8 +645,6 @@ mod graphql_test { ); } - - #[tokio::test] async fn test_unique_temporal_properties() { let g = Graph::new(); diff --git a/raphtory-graphql/src/model/graph/filtering.rs b/raphtory-graphql/src/model/graph/filtering.rs index 3033016e1b..2ab6872c9c 100644 --- a/raphtory-graphql/src/model/graph/filtering.rs +++ b/raphtory-graphql/src/model/graph/filtering.rs @@ -1,4 +1,7 @@ -use crate::model::{graph::{node_id::GqlNodeId, property::Value, timeindex::GqlTimeInput}, plugins::operation}; +use crate::model::{ + graph::{node_id::GqlNodeId, property::Value, timeindex::GqlTimeInput}, + plugins::operation, +}; use async_graphql::dynamic::ValueAccessor; use dynamic_graphql::{ internal::{ @@ -7,16 +10,35 @@ use dynamic_graphql::{ Enum, InputObject, OneOfInput, }; use raphtory::{ - db::{api::{state::ops::Degree, view::internal::filtered_edge}, graph::views::filter::model::{ - ComposableFilter, DynFilter, DynView, NoFilter, ViewWrapOps, degree_filter::DegreeFilter, edge_filter::{CompositeEdgeFilter, EdgeFilter}, filter::{Filter, FilterValue}, filter_operator::FilterOperator, graph_filter::GraphFilter, is_active_edge_filter::IsActiveEdge, is_active_node_filter::IsActiveNode, is_deleted_filter::IsDeletedEdge, is_self_loop_filter::IsSelfLoopEdge, is_valid_filter::IsValidEdge, latest_filter::Latest as LatestWrap, layered_filter::Layered, node_filter::{CompositeNodeFilter, NodeFilter}, property_filter::{Op, PropertyFilter, PropertyFilterValue, PropertyRef}, snapshot_filter::{SnapshotAt as SnapshotAtWrap, SnapshotLatest as SnapshotLatestWrap}, windowed_filter::Windowed - }}, + db::{ + api::{state::ops::Degree, view::internal::filtered_edge}, + graph::views::filter::model::{ + degree_filter::DegreeFilter, + edge_filter::{CompositeEdgeFilter, EdgeFilter}, + filter::{Filter, FilterValue}, + filter_operator::FilterOperator, + graph_filter::GraphFilter, + is_active_edge_filter::IsActiveEdge, + is_active_node_filter::IsActiveNode, + is_deleted_filter::IsDeletedEdge, + is_self_loop_filter::IsSelfLoopEdge, + is_valid_filter::IsValidEdge, + latest_filter::Latest as LatestWrap, + layered_filter::Layered, + node_filter::{CompositeNodeFilter, NodeFilter}, + property_filter::{Op, PropertyFilter, PropertyFilterValue, PropertyRef}, + snapshot_filter::{SnapshotAt as SnapshotAtWrap, SnapshotLatest as SnapshotLatestWrap}, + windowed_filter::Windowed, + ComposableFilter, DynFilter, DynView, NoFilter, ViewWrapOps, + }, + }, errors::GraphError, }; -use raphtory_api::core::Direction; use raphtory_api::core::{ entities::{properties::prop::Prop, Layer, GID}, storage::timeindex::{AsTime, EventTime}, utils::time::IntoTime, + Direction, }; use serde::{Deserialize, Serialize}; use std::{ @@ -304,7 +326,6 @@ pub struct PropertyFilterNew { pub where_: PropCondition, } - /// Filters nodes by computed degree with a directional scope. /// /// `DegreeFilterNew` lets callers filter on: @@ -330,8 +351,8 @@ pub enum DegreeDirection { impl From for Direction { fn from(d: DegreeDirection) -> Self { match d { - DegreeDirection::In => Direction::IN, - DegreeDirection::Out => Direction::OUT, + DegreeDirection::In => Direction::IN, + DegreeDirection::Out => Direction::OUT, DegreeDirection::Both => Direction::BOTH, } } @@ -342,7 +363,7 @@ impl From for String { match d { DegreeDirection::In => "in_degree".to_string(), DegreeDirection::Out => "out_degree".to_string(), - DegreeDirection::Both => "degree".to_string(), + DegreeDirection::Both => "degree".to_string(), } } } @@ -1440,8 +1461,8 @@ impl TryFrom for CompositeNodeFilter { direction: core_direction, operator, value, - ops - })) + ops, + })) } GqlNodeFilter::Property(prop) => { let prop_ref = PropertyRef::Property(prop.name.clone()); diff --git a/raphtory-tests/tests/test_filters.rs b/raphtory-tests/tests/test_filters.rs index a4bc92771c..5976a103ba 100644 --- a/raphtory-tests/tests/test_filters.rs +++ b/raphtory-tests/tests/test_filters.rs @@ -1647,32 +1647,38 @@ fn init_edges_graph_with_str_ids_del< mod test_node_filter { -use crate::{ + use crate::{ init_nodes_graph, init_nodes_graph_with_num_ids, init_nodes_graph_with_str_ids, IdentityGraphTransformer, }; + use proptest::proptest; use raphtory::{ - algorithms::alternating_mask::alternating_mask, core::entities::VID, db::{ - api::view::{Filter, filter_ops::NodeSelect}, - graph::{ - views::filter::{ - CreateFilter, model::{ - ComposableFilter, CompositeNodeFilter, NodeViewFilterOps, PropertyFilterFactory, TryAsCompositeFilter, ViewWrapOps, degree_filter::DegreeFilterFactory, node_filter::ops::{NodeFilterOps, NodeIdFilterOps}, property_filter::ops::{ListAggOps, PropertyFilterOps} - } + algorithms::alternating_mask::alternating_mask, + core::entities::VID, + db::{ + api::view::{filter_ops::NodeSelect, Filter}, + graph::views::filter::{ + model::{ + degree_filter::DegreeFilterFactory, + node_filter::ops::{NodeFilterOps, NodeIdFilterOps}, + property_filter::ops::{ElemQualifierOps, ListAggOps, PropertyFilterOps}, + ComposableFilter, CompositeNodeFilter, NodeViewFilterOps, + PropertyFilterFactory, TryAsCompositeFilter, ViewWrapOps, }, + CreateFilter, }, - }, errors::GraphError, prelude::{ - AdditionOps, Graph, GraphViewOps, NO_PROPS, NodeFilter, NodeStateOps, NodeViewOps, TimeOps - } + }, + errors::GraphError, + prelude::{ + AdditionOps, Graph, GraphViewOps, IntoProp, NodeFilter, NodeStateOps, NodeViewOps, + TimeOps, NO_PROPS, + }, }; + use raphtory_api::core::{entities::properties::prop::Prop, Direction}; use raphtory_tests::assertions::{ assert_filter_nodes_results, assert_search_nodes_results, assert_select_nodes_results, TestVariants, }; - use raphtory_api::core::{Direction, entities::properties::prop::Prop}; - use raphtory::prelude::IntoProp; - use raphtory::db::graph::views::filter::model::property_filter::ops::ElemQualifierOps; - use proptest::proptest; fn sort_vids(mut vids: Vec) -> Vec { vids.sort(); @@ -1717,7 +1723,8 @@ use crate::{ .map(|n| n.node) .collect::>(); - let expected_filter_nodes = candidates_with_history_after_filtering(graph, expected_select_nodes.clone()); + let expected_filter_nodes = + candidates_with_history_after_filtering(graph, expected_select_nodes.clone()); let filtered_event_graph = graph.filter(filter.clone()).unwrap(); let filtered_event_nodes = sort_vids( @@ -1799,7 +1806,6 @@ use crate::{ graph } - fn degree_graph_with_add_edge_only() -> Graph { let graph = Graph::new(); @@ -1844,7 +1850,6 @@ use crate::{ graph } - // Property-based tests for degree filtering proptest! { #[test] @@ -2033,7 +2038,7 @@ use crate::{ |d| d > threshold as usize && d < (threshold + 5) as usize, &format!("OUT > {} AND OUT < {}", threshold, threshold + 5), ); - } + } #[test] fn prop_degree_filter_or(threshold in 0u64..15) { @@ -2158,7 +2163,7 @@ use crate::{ &format!("OUT is_not_in({}, {})", val1, val2), ); } - } + } #[test] fn test_degree_filter_with_invalid_expressions() { @@ -13021,4 +13026,3 @@ mod test_edge_composite_filter { ); } } - diff --git a/raphtory/src/db/api/state/ops/filter.rs b/raphtory/src/db/api/state/ops/filter.rs index 11c5a90747..25e4b70d47 100644 --- a/raphtory/src/db/api/state/ops/filter.rs +++ b/raphtory/src/db/api/state/ops/filter.rs @@ -2,18 +2,25 @@ use crate::{ db::{ api::{ state::{ - Index, ops::{Const, Degree, IntoDynNodeOp, NodeOp, TypeId} + ops::{Const, Degree, IntoDynNodeOp, NodeOp, TypeId}, + Index, }, view::internal::{GraphView, NodeList}, }, graph::{ create_node_type_filter, - views::filter::model::{FilterOperator, degree_filter::DegreeFilter, filter::{Filter, FilterValue}, node_filter::NodeFilter, property_filter::PropertyFilterValue}, + views::filter::model::{ + degree_filter::DegreeFilter, + filter::{Filter, FilterValue}, + node_filter::NodeFilter, + property_filter::PropertyFilterValue, + FilterOperator, + }, }, }, prelude::{GraphViewOps, PropertyFilter}, }; -use raphtory_api::core::entities::{VID, properties::prop::Prop}; +use raphtory_api::core::entities::{properties::prop::Prop, VID}; use raphtory_core::entities::nodes::node_ref::AsNodeRef; use raphtory_storage::graph::{graph::GraphStorage, nodes::node_storage_ops::NodeStorageOps}; use std::sync::Arc; @@ -225,19 +232,19 @@ impl NodeOp for NodePropertyFilterOp { pub struct NodeDegreeFilterOp { degree: Degree, operator: FilterOperator, - value: PropertyFilterValue + value: PropertyFilterValue, } impl NodeDegreeFilterOp { pub(crate) fn new(graph: G, filter: DegreeFilter) -> Self { let degree = Degree { dir: filter.direction, - view: graph + view: graph, }; Self { degree, operator: filter.operator, - value: filter.value + value: filter.value, } } } @@ -248,7 +255,8 @@ impl NodeOp for NodeDegreeFilterOp { fn apply(&self, storage: &GraphStorage, node: VID) -> Self::Output { let node_degree = self.degree.apply(storage, node); let node_degree_prop = Prop::U64(node_degree as u64); - self.operator.apply_to_property(&self.value, Some(&node_degree_prop)) + self.operator + .apply_to_property(&self.value, Some(&node_degree_prop)) } } diff --git a/raphtory/src/db/graph/views/filter/model/degree_filter.rs b/raphtory/src/db/graph/views/filter/model/degree_filter.rs index 18e5f3d463..ef6bc5c56f 100644 --- a/raphtory/src/db/graph/views/filter/model/degree_filter.rs +++ b/raphtory/src/db/graph/views/filter/model/degree_filter.rs @@ -1,24 +1,35 @@ -use std::collections::HashSet; -use std::sync::Arc; - -use raphtory_api::core::entities::properties::prop::PropType; -use raphtory_api::core::{Direction, entities::properties::prop::Prop}; -use raphtory_core::entities::{VID}; +use std::{collections::HashSet, sync::Arc}; + +use crate::{ + db::{ + api::{ + state::ops::{filter::NodeDegreeFilterOp, GraphView}, + view::{GraphViewOps, NodeViewOps}, + }, + graph::views::filter::{ + model, + model::{ + property_filter::{ + builders::{PropertyExprBuilder, PropertyExprBuilderInput}, + Op, PropertyFilter, PropertyFilterInput, PropertyFilterValue, PropertyRef, + }, + CombinedFilter, ComposableFilter, CompositeNodeFilter, EntityMarker, + FilterOperator, InternalPropertyFilterBuilder, NodeFilter, TryAsCompositeFilter, + }, + node_filtered_graph::NodeFilteredGraph, + CreateFilter, + }, + }, + errors::GraphError, +}; +use raphtory_api::core::{ + entities::properties::prop::{Prop, PropType}, + Direction, +}; +use raphtory_core::entities::VID; use raphtory_storage::graph::nodes::{node_ref::NodeStorageRef, node_storage_ops::NodeStorageOps}; -use crate::db::api::state::ops::GraphView; -use crate::db::api::state::ops::filter::NodeDegreeFilterOp; -use crate::db::graph::views::filter::CreateFilter; -use crate::db::graph::views::filter::model::{ComposableFilter, CompositeNodeFilter, NodeFilter}; -use crate::db::graph::views::filter::model::property_filter::{Op, PropertyFilterInput, PropertyRef, PropertyFilter}; -use crate::db::graph::views::filter::model::property_filter::builders::{PropertyExprBuilder, PropertyExprBuilderInput}; -use crate::db::graph::views::filter::model::{CombinedFilter, EntityMarker, InternalPropertyFilterBuilder, TryAsCompositeFilter}; -use crate::db::graph::views::filter::model; -use crate::db::graph::views::filter::node_filtered_graph::NodeFilteredGraph; -use crate::db::{api::view::{GraphViewOps, NodeViewOps}, graph::views::filter::model::{FilterOperator, property_filter::PropertyFilterValue}}; -use crate::errors::GraphError; use std::{fmt, fmt::Display}; - #[derive(Clone)] pub struct DegreeFilterBuilder { direction: Direction, @@ -42,7 +53,6 @@ pub struct DegreeFilter { pub ops: Vec, } - impl CreateFilter for DegreeFilter { type EntityFiltered<'graph, G: GraphViewOps<'graph>> = NodeFilteredGraph>; @@ -73,22 +83,30 @@ impl CreateFilter for DegreeFilter { )); } match self.operator { - FilterOperator::Eq | FilterOperator::Ne| FilterOperator::Gt | FilterOperator::Ge | FilterOperator::Lt | FilterOperator::Le | FilterOperator::IsIn | FilterOperator::IsNotIn => {}, + FilterOperator::Eq + | FilterOperator::Ne + | FilterOperator::Gt + | FilterOperator::Ge + | FilterOperator::Lt + | FilterOperator::Le + | FilterOperator::IsIn + | FilterOperator::IsNotIn => {} _ => { - return Err(GraphError::InvalidFilter( - format!("degree filter does not support operator {:?}", self.operator) - )); + return Err(GraphError::InvalidFilter(format!( + "degree filter does not support operator {:?}", + self.operator + ))); } } let value = match self.value { PropertyFilterValue::Single(ref prop_val) => { let casted_val = prop_val.clone().try_cast(PropType::U64).ok_or_else(|| { GraphError::InvalidFilter(format!( - "degree filter expects an integer value, got {}", + "degree filter expects an integer value, got {}", prop_val.to_string() )) })?; - + PropertyFilterValue::Single(casted_val) } PropertyFilterValue::Set(ref prop_vals) => { @@ -97,7 +115,7 @@ impl CreateFilter for DegreeFilter { .map(|val| { val.clone().try_cast(PropType::U64).ok_or_else(|| { GraphError::InvalidFilter(format!( - "degree filter expects an integer value, got {}", + "degree filter expects an integer value, got {}", val.to_string() )) }) @@ -108,11 +126,11 @@ impl CreateFilter for DegreeFilter { } PropertyFilterValue::None => { return Err(GraphError::InvalidFilter( - "degree filter requires a value".to_string() + "degree filter requires a value".to_string(), )); } - }; - let mut filter = self.clone(); + }; + let mut filter = self.clone(); filter.value = value; Ok(NodeDegreeFilterOp::new(graph, filter)) } @@ -126,19 +144,20 @@ impl CreateFilter for DegreeFilter { } impl TryAsCompositeFilter for DegreeFilter { - fn try_as_composite_edge_filter(&self) -> Result { - Err(GraphError::NotSupported) + fn try_as_composite_edge_filter( + &self, + ) -> Result { + Err(GraphError::NotSupported) } fn try_as_composite_exploded_edge_filter( &self, - ) -> Result - { - Err(GraphError::NotSupported) - } + ) -> Result { + Err(GraphError::NotSupported) + } fn try_as_composite_node_filter(&self) -> Result { Ok(CompositeNodeFilter::Degree(self.clone())) } -} +} fn property_ref(direction: &Direction) -> PropertyRef { match direction { @@ -150,7 +169,7 @@ fn property_ref(direction: &Direction) -> PropertyRef { impl InternalPropertyFilterBuilder for DegreeFilterBuilder where - DegreeFilter: CombinedFilter + DegreeFilter: CombinedFilter, { type Filter = DegreeFilter; type ExprBuilder = DegreeFilterBuilder; @@ -170,10 +189,10 @@ where fn filter(&self, filter: PropertyFilterInput) -> Self::Filter { DegreeFilter { - value: filter.prop_value, - direction: self.direction, - operator: filter.operator, - ops: filter.ops, + value: filter.prop_value, + direction: self.direction, + operator: filter.operator, + ops: filter.ops, } } @@ -187,7 +206,7 @@ where impl ComposableFilter for DegreeFilter {} pub trait DegreeFilterFactory { - fn in_degree(&self) -> DegreeFilterBuilder; + fn in_degree(&self) -> DegreeFilterBuilder; fn out_degree(&self) -> DegreeFilterBuilder; fn degree(&self) -> DegreeFilterBuilder; } @@ -203,4 +222,4 @@ impl Display for DegreeFilter { }; property_filter.fmt(f) } -} +} diff --git a/raphtory/src/db/graph/views/filter/model/mod.rs b/raphtory/src/db/graph/views/filter/model/mod.rs index ef20e4609e..b95369693f 100644 --- a/raphtory/src/db/graph/views/filter/model/mod.rs +++ b/raphtory/src/db/graph/views/filter/model/mod.rs @@ -56,6 +56,7 @@ use raphtory_api::core::{ use std::{ops::Deref, sync::Arc}; pub mod and_filter; +pub mod degree_filter; pub mod edge_filter; pub mod exploded_edge_filter; pub mod filter; @@ -75,8 +76,6 @@ pub mod or_filter; pub mod property_filter; pub mod snapshot_filter; pub mod windowed_filter; -pub mod degree_filter; - #[derive(Debug, Copy, Clone)] pub struct NoFilter; diff --git a/raphtory/src/db/graph/views/filter/model/node_filter/mod.rs b/raphtory/src/db/graph/views/filter/model/node_filter/mod.rs index 4dd4cc38fd..85bf52f82e 100644 --- a/raphtory/src/db/graph/views/filter/model/node_filter/mod.rs +++ b/raphtory/src/db/graph/views/filter/model/node_filter/mod.rs @@ -1,31 +1,47 @@ use crate::{ + api::core::Direction, db::{ api::{ state::{ - NodeStateValue, TypedNodeState, ops::{ - NodeOp, TypeId, filter::{ + ops::{ + filter::{ AndOp, MaskOp, NodeIdFilterOp, NodeNameFilterOp, NodeTypeFilterOp, NotOp, OrOp, - } - } + }, + NodeOp, TypeId, + }, + NodeStateValue, TypedNodeState, }, - view::{BoxableGraphView, internal::GraphView}, + view::{internal::GraphView, BoxableGraphView}, }, graph::views::filter::{ - CreateFilter, model::{ - AndFilter, CombinedFilter, ComposableFilter, CompositeExplodedEdgeFilter, EntityMarker, InternalPropertyFilterFactory, InternalViewWrapOps, NodeViewFilterOps, NotFilter, OrFilter, TryAsCompositeFilter, Wrap, degree_filter::{DegreeFilter, DegreeFilterBuilder}, edge_filter::CompositeEdgeFilter, filter::Filter, is_active_node_filter::IsActiveNode, latest_filter::Latest, layered_filter::Layered, node_filter::{ + model::{ + degree_filter::{DegreeFilter, DegreeFilterBuilder, DegreeFilterFactory}, + edge_filter::CompositeEdgeFilter, + filter::Filter, + is_active_node_filter::IsActiveNode, + latest_filter::Latest, + layered_filter::Layered, + node_filter::{ builders::{NodeIdFilterBuilder, NodeNameFilterBuilder, NodeTypeFilterBuilder}, validate::validate, - }, node_state_filter::NodeStateBoolColOp, property_filter::builders::{MetadataFilterBuilder, PropertyFilterBuilder}, snapshot_filter::{SnapshotAt, SnapshotLatest}, windowed_filter::Windowed - }, node_filtered_graph::NodeFilteredGraph + }, + node_state_filter::NodeStateBoolColOp, + property_filter::builders::{MetadataFilterBuilder, PropertyFilterBuilder}, + snapshot_filter::{SnapshotAt, SnapshotLatest}, + windowed_filter::Windowed, + AndFilter, CombinedFilter, ComposableFilter, CompositeExplodedEdgeFilter, + EntityMarker, InternalPropertyFilterFactory, InternalViewWrapOps, + NodeViewFilterOps, NotFilter, OrFilter, TryAsCompositeFilter, Wrap, + }, + node_filtered_graph::NodeFilteredGraph, + CreateFilter, }, }, errors::GraphError, prelude::{GraphViewOps, PropertyFilter}, }; use raphtory_api::core::storage::timeindex::EventTime; -use crate::api::core::Direction; -use crate::db::graph::views::filter::model::degree_filter::DegreeFilterFactory; use std::{fmt, fmt::Display, sync::Arc}; pub mod builders; @@ -106,19 +122,18 @@ impl InternalPropertyFilterFactory for NodeFilter { impl DegreeFilterFactory for NodeFilter { fn degree(&self) -> DegreeFilterBuilder { - DegreeFilterBuilder::new(Direction::BOTH) + DegreeFilterBuilder::new(Direction::BOTH) } fn in_degree(&self) -> DegreeFilterBuilder { - DegreeFilterBuilder::new(Direction::IN) + DegreeFilterBuilder::new(Direction::IN) } fn out_degree(&self) -> DegreeFilterBuilder { - DegreeFilterBuilder::new(Direction::OUT) + DegreeFilterBuilder::new(Direction::OUT) } } - impl NodeViewFilterOps for NodeFilter { type Output = T; diff --git a/raphtory/src/python/filter/node_filter_builders.rs b/raphtory/src/python/filter/node_filter_builders.rs index 84317343c8..1fd4147f11 100644 --- a/raphtory/src/python/filter/node_filter_builders.rs +++ b/raphtory/src/python/filter/node_filter_builders.rs @@ -1,8 +1,14 @@ use crate::{ db::graph::views::filter::model::{ - NodeViewFilterOps, PropertyFilterFactory, ViewWrapOps, degree_filter::DegreeFilterFactory, node_filter::{ - NodeFilter, builders::{NodeIdFilterBuilder, NodeNameFilterBuilder, NodeTypeFilterBuilder}, ops::{NodeFilterOps, NodeIdFilterOps} - }, node_state_filter::NodeStateBoolColOp, property_filter::builders::{MetadataFilterBuilder, PropertyFilterBuilder} + degree_filter::DegreeFilterFactory, + node_filter::{ + builders::{NodeIdFilterBuilder, NodeNameFilterBuilder, NodeTypeFilterBuilder}, + ops::{NodeFilterOps, NodeIdFilterOps}, + NodeFilter, + }, + node_state_filter::NodeStateBoolColOp, + property_filter::builders::{MetadataFilterBuilder, PropertyFilterBuilder}, + NodeViewFilterOps, PropertyFilterFactory, ViewWrapOps, }, python::{ filter::{ From 526fbf0e917470fb21f826b1c8015cf74455b81c Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 15 Jun 2026 10:37:11 -0400 Subject: [PATCH 08/20] Move GraphMetadata and Metadata to raphtory-api crate. Still need to build the GraphMetadata in Raphtory --- raphtory-api/src/core/storage/graph_folder.rs | 33 +++++++++++++++++++ .../src/model/graph/meta_graph.rs | 3 +- raphtory-graphql/src/paths.rs | 7 ++-- raphtory/Cargo.toml | 2 +- raphtory/src/serialise/graph_folder.rs | 24 ++++---------- raphtory/src/serialise/metadata.rs | 30 +++++------------ raphtory/src/serialise/serialise.rs | 8 ++--- 7 files changed, 57 insertions(+), 50 deletions(-) diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index fd13d76d74..703beaefad 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -1,5 +1,12 @@ //! const vars for file and directory names regarding exported graphs. +use crate::GraphType; +use serde::{Deserialize, Serialize}; +use std::{ + fs::{self, File}, + path::Path, +}; + /// Metadata file that stores path to the data folder. pub const ROOT_META_PATH: &str = ".raph"; /// Outer most directory containing all data. @@ -16,3 +23,29 @@ pub const INDEX_PATH: &str = "index"; pub const VECTORS_PATH: &str = "vectors"; /// Temporary metadata file for atomic replacement. pub const DIRTY_PATH: &str = ".dirty"; + +#[derive(Debug, Serialize, Deserialize)] +pub struct Metadata { + pub path: String, + pub meta: GraphMetadata, +} + +#[derive(PartialEq, Serialize, Deserialize, Debug)] +pub struct GraphMetadata { + pub node_count: usize, + pub edge_count: usize, + pub graph_type: GraphType, + pub is_diskgraph: bool, +} + +#[cfg(feature = "io")] +impl Metadata { + /// Atomically write this metadata into the data folder at `data_path` + pub fn write_atomic(&self, data_path: &Path) -> std::io::Result<()> { + let tmp_path = data_path.join(".tmp"); + let tmp_file = File::create(&tmp_path)?; + serde_json::to_writer(tmp_file, self).map_err(std::io::Error::other)?; + fs::rename(tmp_path, data_path.join(GRAPH_META_PATH))?; + Ok(()) + } +} diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index a4a74fd846..26c54db647 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -9,8 +9,9 @@ use raphtory::{ db::api::storage::storage::read_constant_graph_properties, errors::GraphError, prelude::{GraphViewOps, PropertiesOps}, - serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata, GraphPaths}, + serialise::{parquet::decode_graph_metadata, GraphPaths}, }; +use raphtory_api::core::storage::graph_folder::GraphMetadata; use std::{cmp::Ordering, sync::Arc}; use tokio::sync::OnceCell; diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index ecab36e81c..6275c8a403 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -7,12 +7,9 @@ use raphtory::{ }, errors::{GraphError, InvalidPathReason}, prelude::{AdditionOps, GraphViewOps}, - serialise::{ - metadata::GraphMetadata, GraphFolder, GraphPaths, RelativePath, StableDecode, - WriteableGraphFolder, - }, + serialise::{GraphFolder, GraphPaths, RelativePath, StableDecode, WriteableGraphFolder}, }; -use raphtory_api::core::storage::graph_folder::{DIRTY_PATH, ROOT_META_PATH}; +use raphtory_api::core::storage::graph_folder::{GraphMetadata, DIRTY_PATH, ROOT_META_PATH}; use std::{ cmp::Ordering, fs, diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index c804546e00..78bbb3758e 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -14,7 +14,7 @@ homepage.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -raphtory-api.workspace = true +raphtory-api = { workspace = true, features = ["io"] } raphtory-core.workspace = true raphtory-storage.workspace = true raphtory-itertools.workspace = true diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 58b2353972..718c662158 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -12,14 +12,14 @@ use crate::{ db::api::view::internal::GraphView, errors::GraphError, prelude::ParquetEncoder, - serialise::metadata::GraphMetadata, + serialise::metadata::build_graph_metadata, }; use itertools::Itertools; use raphtory_api::core::{ input::input_node::parse_u64_strict, storage::graph_folder::{ - DATA_PATH, DIRTY_PATH, GRAPH_META_PATH, GRAPH_PATH, INDEX_PATH, ROOT_META_PATH, - VECTORS_PATH, + GraphMetadata, Metadata, DATA_PATH, DIRTY_PATH, GRAPH_META_PATH, GRAPH_PATH, INDEX_PATH, + ROOT_META_PATH, VECTORS_PATH, }, }; use serde::{Deserialize, Serialize}; @@ -128,12 +128,6 @@ pub struct RelativePath { pub path: String, } -#[derive(Debug, Serialize, Deserialize)] -pub struct Metadata { - pub path: String, - pub meta: GraphMetadata, -} - pub trait GraphPaths { fn root(&self) -> &Path; @@ -223,16 +217,12 @@ pub trait GraphPaths { fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { let graph_path = self.relative_graph_path()?; - let metadata = GraphMetadata::from_graph(graph); + let metadata = build_graph_metadata(graph); let meta = Metadata { path: graph_path, meta: metadata, }; - let tmp_path = self.data_path()?.path.join(".tmp"); - let tmp_file = File::create(&tmp_path)?; - serde_json::to_writer(tmp_file, &meta)?; - let path = self.meta_path()?; - fs::rename(tmp_path, path)?; + meta.write_atomic(self.data_path()?.as_ref())?; Ok(()) } @@ -518,7 +508,7 @@ impl AsRef for InnerGraphFolder { impl InnerGraphFolder { pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { let graph_path = self.relative_graph_path()?; - let metadata = GraphMetadata::from_graph(graph); + let metadata = build_graph_metadata(graph); let meta = Metadata { path: graph_path, meta: metadata, @@ -543,7 +533,7 @@ impl InnerGraphFolder { let data_path = self.as_ref(); let old_relative_graph_path = self.relative_graph_path()?; let old_graph_path = self.path.join(&old_relative_graph_path); - let meta = GraphMetadata::from_graph(&graph); + let meta = build_graph_metadata(&graph); let new_relative_graph_path = make_path_pointer(data_path, GRAPH_META_PATH, GRAPH_PATH)?; graph.encode_parquet(data_path.join(&new_relative_graph_path))?; diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 67cbae6375..5a5ada975f 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -3,29 +3,15 @@ use crate::{ prelude::GraphViewOps, serialise::{GraphFolder, GraphPaths}, }; -use raphtory_api::GraphType; -use serde::{Deserialize, Serialize}; +use raphtory_api::core::storage::graph_folder::GraphMetadata; -#[derive(PartialEq, Serialize, Deserialize, Debug)] -pub struct GraphMetadata { - pub node_count: usize, - pub edge_count: usize, - pub graph_type: GraphType, - pub is_diskgraph: bool, -} - -impl GraphMetadata { - pub fn from_graph(graph: G) -> Self { - let node_count = graph.count_nodes(); - let edge_count = graph.count_edges(); - let graph_type = graph.graph_type(); - let is_diskgraph = graph.disk_storage_path().is_some(); - Self { - node_count, - edge_count, - graph_type, - is_diskgraph, - } +/// Build the [`GraphMetadata`] summary for a graph +pub fn build_graph_metadata(graph: impl GraphView) -> GraphMetadata { + GraphMetadata { + node_count: graph.count_nodes(), + edge_count: graph.count_edges(), + graph_type: graph.graph_type(), + is_diskgraph: graph.disk_storage_path().is_some(), } } diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 6438c9d706..d7a59976e7 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -7,13 +7,13 @@ use crate::{ errors::GraphError, serialise::{ get_zip_graph_path, - metadata::GraphMetadata, + metadata::build_graph_metadata, parquet::{ParquetDecoder, ParquetEncoder}, - GraphFolder, GraphPaths, Metadata, RelativePath, + GraphFolder, GraphPaths, RelativePath, }, }; use raphtory_api::core::storage::graph_folder::{ - DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH, GRAPH_META_PATH, ROOT_META_PATH, + Metadata, DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH, GRAPH_META_PATH, ROOT_META_PATH, }; use std::{ fs::File, @@ -34,7 +34,7 @@ pub trait StableEncode: StaticGraphViewOps + AdditionOps { impl StableEncode for T { fn encode_to_zip(&self, mut writer: ZipWriter) -> Result<(), GraphError> { - let graph_meta = GraphMetadata::from_graph(self); + let graph_meta = build_graph_metadata(self); writer.start_file(ROOT_META_PATH, SimpleFileOptions::default())?; writer.write_all(&serde_json::to_vec(&RelativePath { path: DEFAULT_DATA_PATH.to_string(), From a911f24826bcacd83b39d2a810cf122e27328e13 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 16 Jun 2026 05:22:51 -0400 Subject: [PATCH 09/20] Update write_metadata on InnerGraphFolder to use the new write_atomic on Metadata. --- raphtory-api/src/core/storage/graph_folder.rs | 4 ++-- raphtory/src/serialise/graph_folder.rs | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index 703beaefad..3770512559 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -41,11 +41,11 @@ pub struct GraphMetadata { #[cfg(feature = "io")] impl Metadata { /// Atomically write this metadata into the data folder at `data_path` - pub fn write_atomic(&self, data_path: &Path) -> std::io::Result<()> { + pub fn write_atomic(&self, data_path: &Path, meta_path: &Path) -> std::io::Result<()> { let tmp_path = data_path.join(".tmp"); let tmp_file = File::create(&tmp_path)?; serde_json::to_writer(tmp_file, self).map_err(std::io::Error::other)?; - fs::rename(tmp_path, data_path.join(GRAPH_META_PATH))?; + fs::rename(tmp_path, meta_path)?; Ok(()) } } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 718c662158..35efaaf861 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -222,7 +222,7 @@ pub trait GraphPaths { path: graph_path, meta: metadata, }; - meta.write_atomic(self.data_path()?.as_ref())?; + meta.write_atomic(self.data_path()?.as_ref(), self.meta_path()?.as_ref())?; Ok(()) } @@ -513,9 +513,8 @@ impl InnerGraphFolder { path: graph_path, meta: metadata, }; - let path = self.meta_path(); - let file = File::create(&path)?; - Ok(serde_json::to_writer(file, &meta)?) + meta.write_atomic(self.as_ref(), &self.meta_path())?; + Ok(()) } pub fn read_metadata(&self) -> Result { From ca3f9f85fa64777d8338da53d32c6b9e9fe7aa44 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 16 Jun 2026 06:05:41 -0400 Subject: [PATCH 10/20] Update Drop and flush on graphs to use InnerGraphFolder's write_metadata to update metadata instead of previous refresh_disk_graph_metadata. --- db4-storage/src/lib.rs | 9 -------- raphtory-api/src/core/storage/graph_folder.rs | 1 + raphtory/src/db/api/mutation/addition_ops.rs | 21 ++++++++++++------- raphtory/src/db/api/storage/storage.rs | 16 +++++++------- raphtory/src/serialise/graph_folder.rs | 4 ++++ 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 065262279a..746485e4e6 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -231,12 +231,3 @@ pub fn read_constant_graph_properties( > { Ok(Vec::new()) } - -/// No-op shim for when we have db4-storage instead of db4-disk-storage -pub fn refresh_disk_graph_metadata( - _disk_graph_path: &Path, - _node_count: usize, - _edge_count: usize, -) -> Result<(), error::StorageError> { - Ok(()) -} diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index 3770512559..98024de357 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -2,6 +2,7 @@ use crate::GraphType; use serde::{Deserialize, Serialize}; +#[cfg(feature = "io")] use std::{ fs::{self, File}, path::Path, diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index a5c510a37c..f98c184e20 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -8,6 +8,7 @@ use crate::{ graph::{edge::EdgeView, node::NodeView}, }, errors::{into_graph_err, GraphError}, + serialise::InnerGraphFolder, }; use raphtory_api::core::{ entities::properties::{ @@ -23,7 +24,10 @@ use raphtory_storage::{ MutationError, }, }; -use storage::wal::{GraphWalOps, WalOps}; +use storage::{ + error::StorageError, + wal::{GraphWalOps, WalOps}, +}; pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> { // TODO: Probably add vector reference here like add @@ -309,13 +313,14 @@ impl> + StaticGraphViewOps> Addit #[cfg(feature = "io")] { if let Some(disk_path) = self.disk_storage_path() { - let disk_path = disk_path.to_path_buf(); - storage::refresh_disk_graph_metadata( - &disk_path, - self.count_nodes(), - self.count_edges(), - ) - .map_err(|err| MutationError::from(err).into())?; + if let Some(data_folder) = disk_path.parent() { + InnerGraphFolder::new(data_folder) + .write_metadata(self) + .map_err(|err| { + MutationError::from(StorageError::from(std::io::Error::from(err))) + .into() + })?; + } } } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index bc5e8b7862..ed0a25318a 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -13,7 +13,7 @@ use raphtory_api::core::{ meta::Meta, prop::{AsPropRef, Prop, PropType}, }, - GidRef, LayerId, LayerIds, EID, VID, + GidRef, LayerId, EID, VID, }, storage::{dict_mapper::MaybeNew, timeindex::EventTime}, }; @@ -55,6 +55,7 @@ use { }; // Re-export for raphtory dependencies to use when creating graphs. +use crate::serialise::InnerGraphFolder; pub use storage::{ persist::strategy::PersistenceStrategy, read_constant_graph_properties, Config, Extension, }; @@ -70,13 +71,12 @@ pub struct Storage { impl Drop for Storage { fn drop(&mut self) { if let Some(disk_path) = self.graph.disk_storage_path() { - let disk_path = disk_path.to_path_buf(); - let node_count = self.graph.unfiltered_num_nodes(&LayerIds::All); - let edge_count = self.graph.unfiltered_num_edges(&LayerIds::All); - // Drop must not panic - ignore any error refreshing the metadata - // file. The graph data itself is already persisted by the storage - // layer so a stale `.meta` only affects node and edge counts (for now). - let _ = storage::refresh_disk_graph_metadata(&disk_path, node_count, edge_count); + if let Some(data_folder) = disk_path.parent() { + // Drop must not panic - ignore any error refreshing the metadata + // file. The graph data itself is already persisted by the storage + // layer so a stale `.meta` only affects node and edge counts (for now). + let _ = InnerGraphFolder::new(data_folder).write_metadata(&self.graph); + } } } } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 35efaaf861..046762a086 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -506,6 +506,10 @@ impl AsRef for InnerGraphFolder { } impl InnerGraphFolder { + pub fn new(path: impl Into) -> Self { + Self { path: path.into() } + } + pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { let graph_path = self.relative_graph_path()?; let metadata = build_graph_metadata(graph); From abe4b654c7f168054cdd063b6af89423781433cb Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 16 Jun 2026 10:35:26 -0400 Subject: [PATCH 11/20] Stop creating InnerGraphFolder, instead create Metadata and write it to disk --- raphtory/src/db/api/mutation/addition_ops.rs | 29 ++++++++++++-------- raphtory/src/db/api/storage/storage.rs | 16 +++++++++-- raphtory/src/serialise/graph_folder.rs | 4 --- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index f98c184e20..bbc04fac82 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "io")] +use crate::serialise::metadata::build_graph_metadata; use crate::{ core::entities::{edges::edge_ref::EdgeRef, nodes::node_ref::AsNodeRef}, db::{ @@ -8,8 +10,9 @@ use crate::{ graph::{edge::EdgeView, node::NodeView}, }, errors::{into_graph_err, GraphError}, - serialise::InnerGraphFolder, }; +#[cfg(feature = "io")] +use raphtory_api::core::storage::graph_folder::{Metadata, GRAPH_META_PATH}; use raphtory_api::core::{ entities::properties::{ meta::{DEFAULT_NODE_TYPE_ID, STATIC_GRAPH_LAYER_ID}, @@ -24,10 +27,9 @@ use raphtory_storage::{ MutationError, }, }; -use storage::{ - error::StorageError, - wal::{GraphWalOps, WalOps}, -}; +#[cfg(feature = "io")] +use storage::error::StorageError; +use storage::wal::{GraphWalOps, WalOps}; pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> { // TODO: Probably add vector reference here like add @@ -313,13 +315,16 @@ impl> + StaticGraphViewOps> Addit #[cfg(feature = "io")] { if let Some(disk_path) = self.disk_storage_path() { - if let Some(data_folder) = disk_path.parent() { - InnerGraphFolder::new(data_folder) - .write_metadata(self) - .map_err(|err| { - MutationError::from(StorageError::from(std::io::Error::from(err))) - .into() - })?; + if let (Some(data_folder), Some(graph_dir)) = ( + disk_path.parent(), + disk_path.file_name().and_then(|name| name.to_str()), + ) { + let meta = Metadata { + path: graph_dir.to_string(), + meta: build_graph_metadata(self), + }; + meta.write_atomic(data_folder, &data_folder.join(GRAPH_META_PATH)) + .map_err(|err| MutationError::from(StorageError::from(err)).into())?; } } } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index ed0a25318a..ab97458716 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -55,7 +55,10 @@ use { }; // Re-export for raphtory dependencies to use when creating graphs. -use crate::serialise::InnerGraphFolder; +#[cfg(feature = "io")] +use crate::serialise::metadata::build_graph_metadata; +#[cfg(feature = "io")] +use raphtory_api::core::storage::graph_folder::{Metadata, GRAPH_META_PATH}; pub use storage::{ persist::strategy::PersistenceStrategy, read_constant_graph_properties, Config, Extension, }; @@ -71,11 +74,18 @@ pub struct Storage { impl Drop for Storage { fn drop(&mut self) { if let Some(disk_path) = self.graph.disk_storage_path() { - if let Some(data_folder) = disk_path.parent() { + if let (Some(data_folder), Some(graph_dir)) = ( + disk_path.parent(), + disk_path.file_name().and_then(|name| name.to_str()), + ) { // Drop must not panic - ignore any error refreshing the metadata // file. The graph data itself is already persisted by the storage // layer so a stale `.meta` only affects node and edge counts (for now). - let _ = InnerGraphFolder::new(data_folder).write_metadata(&self.graph); + let meta = Metadata { + path: graph_dir.to_string(), + meta: build_graph_metadata(&self.graph), + }; + let _ = meta.write_atomic(data_folder, &data_folder.join(GRAPH_META_PATH)); } } } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 046762a086..35efaaf861 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -506,10 +506,6 @@ impl AsRef for InnerGraphFolder { } impl InnerGraphFolder { - pub fn new(path: impl Into) -> Self { - Self { path: path.into() } - } - pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { let graph_path = self.relative_graph_path()?; let metadata = build_graph_metadata(graph); From c7554f83fc2faa1375c624c9dca226bbd84a3119 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 22 Jun 2026 04:31:30 -0400 Subject: [PATCH 12/20] Remove Drop impl on Storage. remove test for it. --- raphtory/src/db/api/storage/storage.rs | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index ab97458716..3d88833efa 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -55,10 +55,6 @@ use { }; // Re-export for raphtory dependencies to use when creating graphs. -#[cfg(feature = "io")] -use crate::serialise::metadata::build_graph_metadata; -#[cfg(feature = "io")] -use raphtory_api::core::storage::graph_folder::{Metadata, GRAPH_META_PATH}; pub use storage::{ persist::strategy::PersistenceStrategy, read_constant_graph_properties, Config, Extension, }; @@ -70,27 +66,6 @@ pub struct Storage { pub(crate) index: RwLock, } -#[cfg(feature = "io")] -impl Drop for Storage { - fn drop(&mut self) { - if let Some(disk_path) = self.graph.disk_storage_path() { - if let (Some(data_folder), Some(graph_dir)) = ( - disk_path.parent(), - disk_path.file_name().and_then(|name| name.to_str()), - ) { - // Drop must not panic - ignore any error refreshing the metadata - // file. The graph data itself is already persisted by the storage - // layer so a stale `.meta` only affects node and edge counts (for now). - let meta = Metadata { - path: graph_dir.to_string(), - meta: build_graph_metadata(&self.graph), - }; - let _ = meta.write_atomic(data_folder, &data_folder.join(GRAPH_META_PATH)); - } - } - } -} - impl From for Storage { fn from(graph: GraphStorage) -> Self { Self::from_inner(graph) From e1a633989c2b80aecc3ed1f19ce6d61c93b6e4b9 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 23 Jun 2026 04:44:21 -0400 Subject: [PATCH 13/20] Remove meta file update logic from AdditionOps::flush (in raphtory) to GraphStore::flush (in db4-storage). Gets the graph type from reading the .meta file. Checks to make sure the path hasn't changed. --- db4-storage/Cargo.toml | 2 +- db4-storage/src/pages/mod.rs | 56 +++++++++++++++++++- raphtory-api/src/lib.rs | 16 ++++++ raphtory/src/db/api/mutation/addition_ops.rs | 24 --------- 4 files changed, 72 insertions(+), 26 deletions(-) diff --git a/db4-storage/Cargo.toml b/db4-storage/Cargo.toml index 3cf61c6661..2882bf1bde 100644 --- a/db4-storage/Cargo.toml +++ b/db4-storage/Cargo.toml @@ -11,7 +11,7 @@ rust-version.workspace = true edition = "2024" [dependencies] -raphtory-api.workspace = true +raphtory-api = { workspace = true, features = ["io"] } raphtory-api-macros.workspace = true raphtory-core = { workspace = true } raphtory-itertools.workspace = true diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index c849ce8549..ca71aadd28 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -18,9 +18,14 @@ use graph_prop_store::GraphPropStorageInner; use node_page::writer::NodeWriter; use node_store::NodeStorageInner; use parking_lot::RwLockWriteGuard; -use raphtory_api::core::entities::properties::meta::Meta; +use raphtory_api::core::{ + entities::properties::meta::Meta, + storage::graph_folder::{GRAPH_META_PATH, GraphMetadata, Metadata}, +}; use rayon::prelude::*; use std::{ + fs::File, + io::ErrorKind, path::{Path, PathBuf}, sync::{ Arc, @@ -91,6 +96,55 @@ impl< self.edges.flush()?; self.graph_props.flush()?; + self.refresh_metadata()?; + + Ok(()) + } + + /// Refresh the graph metadata file (`.meta`) for disk-backed graphs. Reads graph type from the existing meta file. + /// Errors if the file can't be read or the graph path has changed + fn refresh_metadata(&self) -> Result<(), StorageError> { + let Some(graph_dir) = self.graph_dir.as_ref() else { + return Ok(()); + }; + let (Some(data_folder), Some(graph_path)) = ( + graph_dir.parent(), + graph_dir.file_name().and_then(|name| name.to_str()), + ) else { + return Ok(()); + }; + + // if the file doesn't exist, there is nothing to refresh + let meta_path = data_folder.join(GRAPH_META_PATH); + let file = match File::open(&meta_path) { + Ok(file) => file, + Err(err) if err.kind() == ErrorKind::NotFound => return Ok(()), + Err(err) => return Err(err.into()), + }; + + // a corrupted file returns an error + let existing: Metadata = serde_json::from_reader(file)?; + + // the graph data directory must not change between writes + if existing.path != graph_path { + return Err(StorageError::GenericFailure(format!( + "graph path in {} changed from {:?} to {:?}", + meta_path.display(), + existing.path, + graph_path, + ))); + } + + let metadata = Metadata { + path: graph_path.to_string(), + meta: GraphMetadata { + node_count: self.nodes.num_nodes(), + edge_count: self.edges.num_edges(), + graph_type: existing.meta.graph_type, + is_diskgraph: true, + }, + }; + metadata.write_atomic(data_folder, &meta_path)?; Ok(()) } } diff --git a/raphtory-api/src/lib.rs b/raphtory-api/src/lib.rs index 8861afe5fe..f4ba22740b 100644 --- a/raphtory-api/src/lib.rs +++ b/raphtory-api/src/lib.rs @@ -14,3 +14,19 @@ pub enum GraphType { EventGraph, PersistentGraph, } + +impl GraphType { + pub fn is_event_graph(&self) -> bool { + match self { + GraphType::EventGraph => true, + _ => false, + } + } + + pub fn is_persistent_graph(&self) -> bool { + match self { + GraphType::PersistentGraph => true, + _ => false, + } + } +} diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index bbc04fac82..539d444ef8 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -1,5 +1,3 @@ -#[cfg(feature = "io")] -use crate::serialise::metadata::build_graph_metadata; use crate::{ core::entities::{edges::edge_ref::EdgeRef, nodes::node_ref::AsNodeRef}, db::{ @@ -11,8 +9,6 @@ use crate::{ }, errors::{into_graph_err, GraphError}, }; -#[cfg(feature = "io")] -use raphtory_api::core::storage::graph_folder::{Metadata, GRAPH_META_PATH}; use raphtory_api::core::{ entities::properties::{ meta::{DEFAULT_NODE_TYPE_ID, STATIC_GRAPH_LAYER_ID}, @@ -27,8 +23,6 @@ use raphtory_storage::{ MutationError, }, }; -#[cfg(feature = "io")] -use storage::error::StorageError; use storage::wal::{GraphWalOps, WalOps}; pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> { @@ -311,24 +305,6 @@ impl> + StaticGraphViewOps> Addit self.core_graph() .flush() .map_err(|err| MutationError::from(err).into())?; - - #[cfg(feature = "io")] - { - if let Some(disk_path) = self.disk_storage_path() { - if let (Some(data_folder), Some(graph_dir)) = ( - disk_path.parent(), - disk_path.file_name().and_then(|name| name.to_str()), - ) { - let meta = Metadata { - path: graph_dir.to_string(), - meta: build_graph_metadata(self), - }; - meta.write_atomic(data_folder, &data_folder.join(GRAPH_META_PATH)) - .map_err(|err| MutationError::from(StorageError::from(err)).into())?; - } - } - } - Ok(()) } } From 6f20a9809e6edf2417156067142e13f73099b701 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 25 Jun 2026 05:53:54 -0400 Subject: [PATCH 14/20] Preparing for move of GraphPaths over to raphtory-api. Add zip behind "io" feature in raphtory-api. Update write_metadata to take a Metadata (not GraphView) bc GraphView isn't available in raphtory-api. Update callsites to construct the Metadata. Add an error type in raphtory-api to avoid using GraphError. --- Cargo.lock | 1 + raphtory-api/Cargo.toml | 3 +- raphtory-api/src/core/storage/graph_folder.rs | 22 ++++- raphtory-graphql/src/paths.rs | 27 +++++-- raphtory-tests/tests/graph_index.rs | 4 +- raphtory/src/db/api/view/graph.rs | 10 ++- raphtory/src/db/graph/graph.rs | 16 +++- raphtory/src/db/graph/views/deletion_graph.rs | 16 +++- raphtory/src/errors.rs | 17 +--- raphtory/src/serialise/graph_folder.rs | 81 ++++++++++--------- raphtory/src/serialise/serialise.rs | 12 ++- 11 files changed, 139 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b518abf163..76e1f9dc3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6361,6 +6361,7 @@ dependencies = [ "tracing", "tracing-subscriber", "twox-hash", + "zip", ] [[package]] diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 14818af8fa..68cea8778e 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -17,6 +17,7 @@ edition.workspace = true [dependencies] serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, optional = true } +zip = { workspace = true, optional = true } bigdecimal = { workspace = true, features = ["string-only"] } thiserror = { workspace = true } bytemuck = { workspace = true } @@ -59,4 +60,4 @@ python = [ vectors = [] template = ["dep:minijinja"] search = [] -io = ["dep:serde_json"] +io = ["dep:serde_json", "dep:zip"] diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index 98024de357..ce0df28bbd 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "io")] use std::{ fs::{self, File}, - path::Path, + path::{Path, PathBuf}, }; /// Metadata file that stores path to the data folder. @@ -50,3 +50,23 @@ impl Metadata { Ok(()) } } + +/// Errors returned by the graph folder path operations (the `GraphPaths` trait and its helpers) +#[cfg(feature = "io")] +#[derive(thiserror::Error, Debug)] +pub enum GraphFolderError { + #[error(transparent)] + Io(#[from] std::io::Error), + #[error(transparent)] + Serde(#[from] serde_json::Error), + #[error("zip operation failed: {0}")] + Zip(#[from] zip::result::ZipError), + #[error("Path {0} is not a valid relative data path")] + InvalidRelativePath(String), + #[error("Not a zip archive")] + NotAZip, + #[error("Cannot write graph into non empty folder {0}")] + NonEmptyGraphFolder(PathBuf), + #[error("Graph folder is not initialised for writing")] + NoWriteInProgress, +} diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 6275c8a403..411a60ef5d 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -7,9 +7,14 @@ use raphtory::{ }, errors::{GraphError, InvalidPathReason}, prelude::{AdditionOps, GraphViewOps}, - serialise::{GraphFolder, GraphPaths, RelativePath, StableDecode, WriteableGraphFolder}, + serialise::{ + metadata::build_graph_metadata, GraphFolder, GraphPaths, RelativePath, StableDecode, + WriteableGraphFolder, + }, +}; +use raphtory_api::core::storage::graph_folder::{ + GraphFolderError, GraphMetadata, Metadata, DIRTY_PATH, ROOT_META_PATH, }; -use raphtory_api::core::storage::graph_folder::{GraphMetadata, DIRTY_PATH, ROOT_META_PATH}; use std::{ cmp::Ordering, fs, @@ -159,7 +164,11 @@ impl ExistingGraphFolder { if path != self.global_path.graph_path()? { return Err(InternalPathValidationError::MismatchedGraphPath); } - self.global_path.write_metadata(&graph)?; + let meta = Metadata { + path: self.global_path.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + self.global_path.write_metadata(meta)?; } else { self.global_path.data_path()?.replace_graph(graph)?; } @@ -382,7 +391,11 @@ impl ValidWriteableGraphFolder { .disk_storage_path() .is_some_and(|path| path == &graph_path) { - self.global_path.write_metadata(&graph)?; + let meta = Metadata { + path: self.global_path.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + self.global_path.write_metadata(meta)?; (true, graph) } else { let new_graph = graph.materialize_at_with_config(self.graph_folder(), config)?; @@ -460,6 +473,8 @@ pub enum InternalPathValidationError { InvalidMetadata(#[from] serde_json::Error), #[error(transparent)] GraphError(#[from] GraphError), + #[error(transparent)] + GraphFolderError(#[from] GraphFolderError), #[error("Graph path should always have a parent")] MissingParent, #[error(transparent)] @@ -618,11 +633,11 @@ impl GraphPaths for ValidGraphFolder { self.global_path.root() } - fn relative_data_path(&self) -> Result { + fn relative_data_path(&self) -> Result { self.global_path.relative_data_path() } - fn relative_graph_path(&self) -> Result { + fn relative_graph_path(&self) -> Result { self.global_path.relative_graph_path() } } diff --git a/raphtory-tests/tests/graph_index.rs b/raphtory-tests/tests/graph_index.rs index 7dd254d1f4..e2ac5fad40 100644 --- a/raphtory-tests/tests/graph_index.rs +++ b/raphtory-tests/tests/graph_index.rs @@ -226,7 +226,9 @@ mod test_index { let result = graph.encode(path); match result { - Err(GraphError::NonEmptyGraphFolder(err_path)) => { + Err(GraphError::GraphFolder(err)) + if matches!(err, GraphFolderError::NonEmptyGraphFolder(err_path)) => + { assert_eq!(path, err_path); } Ok(_) => panic!("Expected error on second encode, got Ok"), diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index d21868f439..12f435ae01 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -1,5 +1,5 @@ #[cfg(feature = "io")] -use crate::serialise::GraphPaths; +use crate::serialise::{metadata::build_graph_metadata, GraphPaths}; use crate::{ arrow_loader::{ dataframe::{DFChunk, DFView}, @@ -43,6 +43,8 @@ use ahash::HashSet; use arrow::array::RecordBatch; use db4_graph::TemporalGraph; use either::Either; +#[cfg(feature = "io")] +use raphtory_api::core::storage::graph_folder::Metadata as GraphFolderMetadata; use raphtory_api::core::{ entities::properties::meta::{Meta, PropMapper}, storage::{arc_str::ArcStr, timeindex::EventTime}, @@ -681,7 +683,11 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { path.init()?; let graph_path = path.graph_path()?; let graph = materialize_impl(self, Some(graph_path.as_ref()), config)?; - path.write_metadata(&graph)?; + let meta = GraphFolderMetadata { + path: path.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + path.write_metadata(meta)?; Ok(graph) } else { Err(GraphError::DiskGraphNotEnabled) diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 67d93b8b6d..cf1b5bac75 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -17,7 +17,7 @@ //! use super::views::deletion_graph::PersistentGraph; #[cfg(feature = "io")] -use crate::serialise::GraphPaths; +use crate::serialise::{metadata::build_graph_metadata, GraphPaths}; use crate::{ db::{ api::{ @@ -36,6 +36,8 @@ use crate::{ errors::GraphError, prelude::*, }; +#[cfg(feature = "io")] +use raphtory_api::core::storage::graph_folder::Metadata as GraphFolderMetadata; use raphtory_api::{ core::storage::{arc_str::ArcStr, timeindex::AsTime}, inherit::Base, @@ -177,7 +179,11 @@ impl Graph { inner: Arc::new(storage), }; - path.write_metadata(&graph)?; + let meta = GraphFolderMetadata { + path: path.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + path.write_metadata(meta)?; Ok(graph) } @@ -199,7 +205,11 @@ impl Graph { )?), }; - path.write_metadata(&graph)?; + let meta = GraphFolderMetadata { + path: path.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + path.write_metadata(meta)?; Ok(graph) } diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index c47cd37ff4..19bcbc1fbe 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -1,5 +1,5 @@ #[cfg(feature = "io")] -use crate::serialise::GraphPaths; +use crate::serialise::{metadata::build_graph_metadata, GraphPaths}; use crate::{ core::storage::timeindex::{AsTime, EventTime, TimeIndex, TimeIndexOps}, db::{ @@ -15,6 +15,8 @@ use crate::{ errors::GraphError, prelude::*, }; +#[cfg(feature = "io")] +use raphtory_api::core::storage::graph_folder::Metadata; use raphtory_api::{ core::entities::properties::tprop::TPropOps, inherit::Base, @@ -141,7 +143,11 @@ impl PersistentGraph { path.graph_path()?, config, )?)); - path.write_metadata(&graph)?; + let meta = Metadata { + path: path.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + path.write_metadata(meta)?; Ok(graph) } @@ -163,7 +169,11 @@ impl PersistentGraph { } path.init()?; let graph = Self(Arc::new(Storage::new_at_path(path.graph_path()?)?)); - path.write_metadata(&graph)?; + let meta = Metadata { + path: path.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + path.write_metadata(meta)?; Ok(graph) } diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index e75edf6899..d6355a3d59 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -31,7 +31,7 @@ use raphtory_api::core::utils::time::ParseTimeError; #[cfg(feature = "search")] use {tantivy, tantivy::query::QueryParserError}; -use raphtory_api::core::storage::timeindex::TimeError; +use raphtory_api::core::storage::{graph_folder::GraphFolderError, timeindex::TimeError}; use storage::{error::StorageError, resolver::mapping_resolver::InvalidNodeId}; #[cfg(feature = "io")] use zip::result::ZipError; @@ -261,15 +261,9 @@ pub enum GraphError { location: &'static Location<'static>, }, - #[error("Not a zip archive")] - NotAZip, - #[error("Not a disk graph")] NotADiskGraph, - #[error("Graph folder is not initialised for writing")] - NoWriteInProgress, - #[error("Failed to load graph: {0}")] LoadFailure(String), @@ -323,10 +317,6 @@ pub enum GraphError { #[error("Illegal set error {0}")] IllegalSet(String), - #[cfg(feature = "io")] - #[error("Cannot write graph into non empty folder {0}")] - NonEmptyGraphFolder(PathBuf), - #[error("Immutable graph is .. immutable!")] AttemptToMutateImmutableGraph, @@ -438,14 +428,15 @@ pub enum GraphError { source: StripPrefixError, location: &'static Location<'static>, }, - #[error("Path {0} is not a valid relative data path")] - InvalidRelativePath(String), #[error(transparent)] StorageError(#[from] StorageError), #[error("Fatal write error: {0}")] FatalWriteError(StorageError), + + #[error(transparent)] + GraphFolder(#[from] GraphFolderError), } impl From for GraphError { diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 35efaaf861..1dad909553 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -18,8 +18,8 @@ use itertools::Itertools; use raphtory_api::core::{ input::input_node::parse_u64_strict, storage::graph_folder::{ - GraphMetadata, Metadata, DATA_PATH, DIRTY_PATH, GRAPH_META_PATH, GRAPH_PATH, INDEX_PATH, - ROOT_META_PATH, VECTORS_PATH, + GraphFolderError, GraphMetadata, Metadata, DATA_PATH, DIRTY_PATH, GRAPH_META_PATH, + GRAPH_PATH, INDEX_PATH, ROOT_META_PATH, VECTORS_PATH, }, }; use serde::{Deserialize, Serialize}; @@ -31,15 +31,18 @@ use std::{ use walkdir::WalkDir; use zip::{write::FileOptions, ZipArchive, ZipWriter}; -pub(crate) fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphError> { +pub(crate) fn valid_path_pointer( + relative_path: &str, + prefix: &str, +) -> Result<(), GraphFolderError> { relative_path .strip_prefix(prefix) // should have the prefix .and_then(parse_u64_strict) // the remainder should be the id - .ok_or_else(|| GraphError::InvalidRelativePath(relative_path.to_string()))?; + .ok_or_else(|| GraphFolderError::InvalidRelativePath(relative_path.to_string()))?; Ok(()) } -fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { +fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { let mut value = String::new(); file.read_to_string(&mut value)?; let path: RelativePath = serde_json::from_str(&value)?; @@ -51,7 +54,7 @@ pub fn read_path_pointer( base_path: &Path, file_name: &str, prefix: &str, -) -> Result, GraphError> { +) -> Result, GraphFolderError> { let file = match File::open(base_path.join(file_name)) { Ok(file) => file, Err(error) => { @@ -69,7 +72,7 @@ pub fn make_path_pointer( base_path: &Path, file_name: &str, prefix: &str, -) -> Result { +) -> Result { let mut id = read_path_pointer(base_path, file_name, prefix)? .and_then(|path| { path.strip_prefix(prefix) @@ -89,11 +92,13 @@ pub fn read_or_default_path_pointer( base_path: &Path, file_name: &str, prefix: &str, -) -> Result { +) -> Result { Ok(read_path_pointer(base_path, file_name, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) } -pub fn get_zip_data_path(zip: &mut ZipArchive) -> Result { +pub fn get_zip_data_path( + zip: &mut ZipArchive, +) -> Result { let file = zip.by_name(ROOT_META_PATH)?; Ok(read_path_from_file(file, DATA_PATH)?) } @@ -109,14 +114,16 @@ pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result( zip: &mut ZipArchive, mut data_path: String, -) -> Result { +) -> Result { data_path.push('/'); data_path.push_str(GRAPH_META_PATH); let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; Ok(graph_path) } -pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { +pub fn get_zip_meta_path( + zip: &mut ZipArchive, +) -> Result { let mut path = get_zip_data_path(zip)?; path.push('/'); path.push_str(GRAPH_META_PATH); @@ -135,31 +142,31 @@ pub trait GraphPaths { self.root().join(ROOT_META_PATH) } - fn data_path(&self) -> Result { + fn data_path(&self) -> Result { Ok(InnerGraphFolder { path: self.root().join(self.relative_data_path()?), }) } - fn vectors_path(&self) -> Result { + fn vectors_path(&self) -> Result { let mut path = self.data_path()?.path; path.push(VECTORS_PATH); Ok(path) } - fn index_path(&self) -> Result { + fn index_path(&self) -> Result { let mut path = self.data_path()?.path; path.push(INDEX_PATH); Ok(path) } - fn graph_path(&self) -> Result { + fn graph_path(&self) -> Result { let mut path = self.data_path()?.path; path.push(self.relative_graph_path()?); Ok(path) } - fn meta_path(&self) -> Result { + fn meta_path(&self) -> Result { let mut path = self.data_path()?.path; path.push(GRAPH_META_PATH); Ok(path) @@ -169,17 +176,17 @@ pub trait GraphPaths { self.root().is_file() } - fn read_zip(&self) -> Result, GraphError> { + fn read_zip(&self) -> Result, GraphFolderError> { if self.is_zip() { let file = File::open(self.root())?; let archive = ZipArchive::new(file)?; Ok(archive) } else { - Err(GraphError::NotAZip) + Err(GraphFolderError::NotAZip) } } - fn relative_data_path(&self) -> Result { + fn relative_data_path(&self) -> Result { let path = if self.is_zip() { let mut zip = self.read_zip()?; get_zip_data_path(&mut zip)? @@ -189,7 +196,7 @@ pub trait GraphPaths { Ok(path) } - fn relative_graph_path(&self) -> Result { + fn relative_graph_path(&self) -> Result { if self.is_zip() { let mut zip = self.read_zip()?; let data_path = get_zip_data_path(&mut zip)?; @@ -200,7 +207,7 @@ pub trait GraphPaths { } } - fn read_metadata(&self) -> Result { + fn read_metadata(&self) -> Result { let mut json = String::new(); if self.is_zip() { let mut zip = self.read_zip()?; @@ -215,13 +222,7 @@ pub trait GraphPaths { Ok(metadata.meta) } - fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { - let graph_path = self.relative_graph_path()?; - let metadata = build_graph_metadata(graph); - let meta = Metadata { - path: graph_path, - meta: metadata, - }; + fn write_metadata(&self, meta: Metadata) -> Result<(), GraphFolderError> { meta.write_atomic(self.data_path()?.as_ref(), self.meta_path()?.as_ref())?; Ok(()) } @@ -232,11 +233,11 @@ pub trait GraphPaths { } /// Initialise the data folder and metadata pointer - fn init(&self) -> Result<(), GraphError> { + fn init(&self) -> Result<(), GraphFolderError> { if self.root().is_dir() { let non_empty = self.root().read_dir()?.next().is_some(); if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.root().into())); + return Err(GraphFolderError::NonEmptyGraphFolder(self.root().into())); } } else { fs::create_dir_all(self.root())? @@ -379,11 +380,13 @@ impl GraphFolder { } } - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { + fn ensure_clean_root_dir(&self) -> Result<(), GraphFolderError> { if self.root_folder.exists() { let non_empty = self.root_folder.read_dir()?.next().is_some(); if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.root_folder.clone())); + return Err(GraphFolderError::NonEmptyGraphFolder( + self.root_folder.clone(), + )); } } else { fs::create_dir(&self.root_folder)? @@ -453,19 +456,19 @@ impl GraphPaths for WriteableGraphFolder { &self.path } - fn relative_data_path(&self) -> Result { + fn relative_data_path(&self) -> Result { let path = read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH)? - .ok_or(GraphError::NoWriteInProgress)?; + .ok_or(GraphFolderError::NoWriteInProgress)?; Ok(path) } - fn relative_graph_path(&self) -> Result { + fn relative_graph_path(&self) -> Result { let path = read_or_default_path_pointer(&self.data_path()?.as_ref(), GRAPH_META_PATH, GRAPH_PATH)?; Ok(path) } - fn init(&self) -> Result<(), GraphError> { + fn init(&self) -> Result<(), GraphFolderError> { Ok(()) } } @@ -571,11 +574,13 @@ impl InnerGraphFolder { Ok(self.path.join(self.relative_graph_path()?)) } - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { + fn ensure_clean_root_dir(&self) -> Result<(), GraphFolderError> { if self.as_ref().exists() { let non_empty = self.as_ref().read_dir()?.next().is_some(); if non_empty { - return Err(GraphError::NonEmptyGraphFolder(self.as_ref().to_path_buf())); + return Err(GraphFolderError::NonEmptyGraphFolder( + self.as_ref().to_path_buf(), + )); } } else { fs::create_dir_all(self)? diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index d7a59976e7..2773ff1bcb 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -172,7 +172,11 @@ impl StableDecode for T { )?; //TODO: graph.load_index_from_zip(&mut reader, prefix) - target.write_metadata(&graph)?; + let meta = Metadata { + path: target.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + target.write_metadata(meta)?; Ok(graph) } @@ -208,7 +212,11 @@ impl StableDecode for T { config, )?; } - target.write_metadata(&graph)?; + let meta = Metadata { + path: target.relative_graph_path()?, + meta: build_graph_metadata(&graph), + }; + target.write_metadata(meta)?; Ok(graph) } } From a25472e45aa2604ee8f9bb6d175f1d021dea4168 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 26 Jun 2026 01:51:13 -0400 Subject: [PATCH 15/20] Prepare rest of graph_folder.rs file for move to raphtory-api. Change error types. Update InnerGraph to be ready (write_metadata doesn't take GraphView). replace_graph is now replace_graph_path and updates the metadata/removes the old dir. Encoding the new graph's data is handled separately in replace_graph_in_folder and stays in raphtory. --- Cargo.lock | 1 + raphtory-api/Cargo.toml | 3 +- raphtory-api/src/core/storage/graph_folder.rs | 4 + raphtory-graphql/src/client/error.rs | 4 + raphtory-graphql/src/paths.rs | 8 +- .../src/graph/nodes/node_storage_ops.rs | 2 +- raphtory-tests/tests/graph_index.rs | 7 +- raphtory/src/errors.rs | 3 - raphtory/src/serialise/graph_folder.rs | 80 +++++++------------ raphtory/src/serialise/metadata.rs | 29 ++++++- raphtory/src/serialise/serialise.rs | 7 +- 11 files changed, 82 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 76e1f9dc3b..c80c067b27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6361,6 +6361,7 @@ dependencies = [ "tracing", "tracing-subscriber", "twox-hash", + "walkdir", "zip", ] diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 68cea8778e..253c669963 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -18,6 +18,7 @@ edition.workspace = true serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, optional = true } zip = { workspace = true, optional = true } +walkdir = { workspace = true, optional = true } bigdecimal = { workspace = true, features = ["string-only"] } thiserror = { workspace = true } bytemuck = { workspace = true } @@ -60,4 +61,4 @@ python = [ vectors = [] template = ["dep:minijinja"] search = [] -io = ["dep:serde_json", "dep:zip"] +io = ["dep:serde_json", "dep:zip", "dep:walkdir"] diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index ce0df28bbd..245f9ebc53 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -69,4 +69,8 @@ pub enum GraphFolderError { NonEmptyGraphFolder(PathBuf), #[error("Graph folder is not initialised for writing")] NoWriteInProgress, + #[error("Cannot swap zipped graph data")] + ZippedGraphCannotBeSwapped, + #[error("IO operation failed: {0}")] + IOErrorMsg(String), } diff --git a/raphtory-graphql/src/client/error.rs b/raphtory-graphql/src/client/error.rs index e58f38a9c3..c4b2d16ce6 100644 --- a/raphtory-graphql/src/client/error.rs +++ b/raphtory-graphql/src/client/error.rs @@ -1,5 +1,6 @@ //! Error type for the GraphQL client. +use raphtory_api::core::storage::graph_folder::GraphFolderError; use thiserror::Error; #[derive(Error, Debug)] @@ -27,4 +28,7 @@ pub enum ClientError { #[error("The request did not succeed.")] UnsuccessfulResponse, + + #[error(transparent)] + GraphFolder(#[from] GraphFolderError), } diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 411a60ef5d..76f7f22062 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -8,8 +8,8 @@ use raphtory::{ errors::{GraphError, InvalidPathReason}, prelude::{AdditionOps, GraphViewOps}, serialise::{ - metadata::build_graph_metadata, GraphFolder, GraphPaths, RelativePath, StableDecode, - WriteableGraphFolder, + metadata::{build_graph_metadata, replace_graph_in_folder}, + GraphFolder, GraphPaths, RelativePath, StableDecode, WriteableGraphFolder, }, }; use raphtory_api::core::storage::graph_folder::{ @@ -170,7 +170,7 @@ impl ExistingGraphFolder { }; self.global_path.write_metadata(meta)?; } else { - self.global_path.data_path()?.replace_graph(graph)?; + replace_graph_in_folder(&self.global_path.data_path()?, graph)?; } Ok(()) }) @@ -402,7 +402,7 @@ impl ValidWriteableGraphFolder { (true, new_graph) } } else { - self.global_path.data_path()?.replace_graph(graph.clone())?; + replace_graph_in_folder(&self.global_path.data_path()?, graph.clone())?; (false, graph) }; Ok(is_dirty) diff --git a/raphtory-storage/src/graph/nodes/node_storage_ops.rs b/raphtory-storage/src/graph/nodes/node_storage_ops.rs index 6d9c88aa1c..6ffdb7c159 100644 --- a/raphtory-storage/src/graph/nodes/node_storage_ops.rs +++ b/raphtory-storage/src/graph/nodes/node_storage_ops.rs @@ -2,7 +2,7 @@ use raphtory_api::core::{ entities::{ edges::edge_ref::EdgeRef, properties::{ - meta::{STATIC_GRAPH_LAYER, STATIC_GRAPH_LAYER_ID, STATIC_GRAPH_LAYER_NAME}, + meta::{STATIC_GRAPH_LAYER, STATIC_GRAPH_LAYER_ID}, prop::Prop, }, GidRef, LayerId, LayerIds, VID, diff --git a/raphtory-tests/tests/graph_index.rs b/raphtory-tests/tests/graph_index.rs index e2ac5fad40..3e6f233ea0 100644 --- a/raphtory-tests/tests/graph_index.rs +++ b/raphtory-tests/tests/graph_index.rs @@ -142,7 +142,8 @@ mod test_index { serialise::GraphFolder, }; use raphtory_api::core::{ - entities::properties::prop::Prop, storage::arc_str::ArcStr, + entities::properties::prop::Prop, + storage::{arc_str::ArcStr, graph_folder::GraphFolderError}, utils::logging::global_info_logger, }; use tempfile::TempDir; @@ -226,9 +227,7 @@ mod test_index { let result = graph.encode(path); match result { - Err(GraphError::GraphFolder(err)) - if matches!(err, GraphFolderError::NonEmptyGraphFolder(err_path)) => - { + Err(GraphError::GraphFolder(GraphFolderError::NonEmptyGraphFolder(err_path))) => { assert_eq!(path, err_path); } Ok(_) => panic!("Expected error on second encode, got Ok"), diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index d6355a3d59..f020d54c80 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -420,9 +420,6 @@ pub enum GraphError { #[error("Your window and step must be of the same type: duration (string) or epoch (int)")] MismatchedIntervalTypes, - #[error("Cannot swap zipped graph data")] - ZippedGraphCannotBeSwapped, - #[error("{source} at {location}")] StripPrefixError { source: StripPrefixError, diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 1dad909553..073ac94311 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -10,10 +10,6 @@ //! ├── index/ # Search indexes (optional) //! └── vectors/ # Vector embeddings (optional) -use crate::{ - db::api::view::internal::GraphView, errors::GraphError, prelude::ParquetEncoder, - serialise::metadata::build_graph_metadata, -}; use itertools::Itertools; use raphtory_api::core::{ input::input_node::parse_u64_strict, @@ -103,7 +99,9 @@ pub fn get_zip_data_path( Ok(read_path_from_file(file, DATA_PATH)?) } -pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { +pub fn get_zip_graph_path( + zip: &mut ZipArchive, +) -> Result { let mut path = get_zip_data_path(zip)?; let graph_path = get_zip_graph_path_name(zip, path.clone())?; path.push('/'); @@ -268,7 +266,7 @@ impl + ?Sized> GraphPaths for P { #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct GraphFolder { root_folder: PathBuf, - pub(crate) write_as_zip_format: bool, + pub write_as_zip_format: bool, } impl GraphPaths for GraphFolder { @@ -288,9 +286,9 @@ impl GraphFolder { /// Reserve a folder, marking it as occupied by a graph. /// Returns an error if the folder has data. - pub fn init_write(self) -> Result { + pub fn init_write(self) -> Result { if self.write_as_zip_format { - return Err(GraphError::ZippedGraphCannotBeSwapped); + return Err(GraphFolderError::ZippedGraphCannotBeSwapped); } let relative_data_path = self.relative_data_path()?; let meta = serde_json::to_string(&RelativePath { @@ -311,9 +309,9 @@ impl GraphFolder { /// /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and /// the contents of the corresponding folder are deleted. - pub fn init_swap(self) -> Result { + pub fn init_swap(self) -> Result { if self.write_as_zip_format { - return Err(GraphError::ZippedGraphCannotBeSwapped); + return Err(GraphFolderError::ZippedGraphCannotBeSwapped); } let old_swap = match read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH) { Ok(path) => path, @@ -353,9 +351,9 @@ impl GraphFolder { } /// Clears the folder of any contents. - pub fn clear(&self) -> Result<(), GraphError> { + pub fn clear(&self) -> Result<(), GraphFolderError> { if self.is_zip() { - return Err(GraphError::IOErrorMsg( + return Err(GraphFolderError::IOErrorMsg( "Cannot clear a zip folder".to_string(), )); } @@ -365,7 +363,7 @@ impl GraphFolder { Ok(()) } - pub fn get_zip_graph_prefix(&self) -> Result { + pub fn get_zip_graph_prefix(&self) -> Result { if self.is_zip() { let mut zip = self.read_zip()?; Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) @@ -395,13 +393,13 @@ impl GraphFolder { Ok(()) } - pub fn is_disk_graph(&self) -> Result { + pub fn is_disk_graph(&self) -> Result { let meta = self.read_metadata()?; Ok(meta.is_diskgraph) } /// Creates a zip file from the folder. - pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphError> { + pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphFolderError> { if self.is_zip() { let mut reader = File::open(&self.root_folder)?; io::copy(&mut reader, &mut writer)?; @@ -413,7 +411,7 @@ impl GraphFolder { { let path = entry.path(); let rel_path = path.strip_prefix(&self.root_folder).map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) + GraphFolderError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) })?; let zip_entry_name = rel_path @@ -437,7 +435,7 @@ impl GraphFolder { Ok(()) } - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphFolderError> { self.ensure_clean_root_dir()?; let mut archive = ZipArchive::new(reader)?; archive.extract(self.root())?; @@ -478,7 +476,7 @@ impl WriteableGraphFolder { /// and cleaning up any old data if it exists. /// /// This operation returns an error if there is no write in progress. - pub fn finish(self) -> Result { + pub fn finish(self) -> Result { let old_data = read_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; fs::rename( self.root().join(DIRTY_PATH), @@ -509,18 +507,12 @@ impl AsRef for InnerGraphFolder { } impl InnerGraphFolder { - pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { - let graph_path = self.relative_graph_path()?; - let metadata = build_graph_metadata(graph); - let meta = Metadata { - path: graph_path, - meta: metadata, - }; + pub fn write_metadata(&self, meta: Metadata) -> Result<(), GraphFolderError> { meta.write_atomic(self.as_ref(), &self.meta_path())?; Ok(()) } - pub fn read_metadata(&self) -> Result { + pub fn read_metadata(&self) -> Result { let mut json = String::new(); let mut file = File::open(self.meta_path())?; file.read_to_string(&mut json)?; @@ -528,28 +520,18 @@ impl InnerGraphFolder { Ok(metadata.meta) } - pub fn replace_graph( - &self, - graph: impl ParquetEncoder + GraphView + std::fmt::Debug, - ) -> Result<(), GraphError> { - let data_path = self.as_ref(); + /// Atomically point the metadata file at the graph data described by `meta`, removing the + /// previously-referenced graph directory if the path changed. + /// + /// NOTE: this does NOT encode the graph data itself. The caller must have already written + /// the graph data into the directory in `meta.path` (see the `replace_graph` in `raphtory`) + pub fn replace_graph_path(&self, meta: Metadata) -> Result<(), GraphFolderError> { let old_relative_graph_path = self.relative_graph_path()?; - let old_graph_path = self.path.join(&old_relative_graph_path); - let meta = build_graph_metadata(&graph); - let new_relative_graph_path = make_path_pointer(data_path, GRAPH_META_PATH, GRAPH_PATH)?; - graph.encode_parquet(data_path.join(&new_relative_graph_path))?; + let path_changed = meta.path != old_relative_graph_path; - let dirty_path = data_path.join(DIRTY_PATH); - fs::write( - &dirty_path, - &serde_json::to_vec(&Metadata { - path: new_relative_graph_path.clone(), - meta, - })?, - )?; - fs::rename(&dirty_path, data_path.join(GRAPH_META_PATH))?; - if new_relative_graph_path != old_relative_graph_path { - fs::remove_dir_all(old_graph_path)?; + self.write_metadata(meta)?; + if path_changed { + fs::remove_dir_all(self.as_ref().join(&old_relative_graph_path))?; } Ok(()) } @@ -565,12 +547,12 @@ impl InnerGraphFolder { self.path.join(GRAPH_META_PATH) } - pub fn relative_graph_path(&self) -> Result { + pub fn relative_graph_path(&self) -> Result { let relative = read_or_default_path_pointer(&self.path, GRAPH_META_PATH, GRAPH_PATH)?; Ok(relative) } - pub fn graph_path(&self) -> Result { + pub fn graph_path(&self) -> Result { Ok(self.path.join(self.relative_graph_path()?)) } @@ -589,7 +571,7 @@ impl InnerGraphFolder { } /// Extracts a zip file to the folder. - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphFolderError> { self.ensure_clean_root_dir()?; let mut zip = ZipArchive::new(reader)?; diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 5a5ada975f..464d1d6098 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -1,9 +1,12 @@ use crate::{ db::api::view::internal::GraphView, - prelude::GraphViewOps, - serialise::{GraphFolder, GraphPaths}, + errors::GraphError, + prelude::{GraphViewOps, ParquetEncoder}, + serialise::{make_path_pointer, GraphFolder, GraphPaths, InnerGraphFolder}, +}; +use raphtory_api::core::storage::graph_folder::{ + GraphMetadata, Metadata, GRAPH_META_PATH, GRAPH_PATH, }; -use raphtory_api::core::storage::graph_folder::GraphMetadata; /// Build the [`GraphMetadata`] summary for a graph pub fn build_graph_metadata(graph: impl GraphView) -> GraphMetadata { @@ -15,6 +18,26 @@ pub fn build_graph_metadata(graph: impl GraphView) -> GraphMetadata { } } +/// Encode `graph`'s data into a fresh directory inside `folder` and atomically point the folder's +/// metadata at it, deleting any previously-stored graph data. +/// +/// The graph-data encoding lives in `raphtory` because the `ParquetEncoder` / +/// `GraphView` traits aren't available in `raphtory-api` +pub fn replace_graph_in_folder( + folder: &InnerGraphFolder, + graph: impl ParquetEncoder + GraphView + std::fmt::Debug, +) -> Result<(), GraphError> { + let data_path = folder.as_ref(); + let new_relative_graph_path = make_path_pointer(data_path, GRAPH_META_PATH, GRAPH_PATH)?; + graph.encode_parquet(data_path.join(&new_relative_graph_path))?; + let meta = Metadata { + path: new_relative_graph_path, + meta: build_graph_metadata(&graph), + }; + folder.replace_graph_path(meta)?; + Ok(()) +} + pub fn assert_metadata_correct<'graph>(folder: &GraphFolder, graph: &impl GraphViewOps<'graph>) { let metadata = folder.read_metadata().unwrap(); assert_eq!(metadata.node_count, graph.count_nodes()); diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 2773ff1bcb..cc99e1635f 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -72,7 +72,12 @@ impl StableEncode for T { self.encode_parquet(write_folder.graph_path()?)?; #[cfg(feature = "search")] self.persist_index_to_disk(&write_folder)?; - write_folder.data_path()?.write_metadata(self)?; + let data_folder = write_folder.data_path()?; + let meta = Metadata { + path: data_folder.relative_graph_path()?, + meta: build_graph_metadata(self), + }; + data_folder.write_metadata(meta)?; write_folder.finish()?; } Ok(()) From 45532a6e33c4b4fa56b3c1de9acc6b8d93c4aa21 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 26 Jun 2026 03:53:43 -0400 Subject: [PATCH 16/20] Move contents of graph_folder.rs in raphtory over to graph_folder.rs in raphtory-api. --- raphtory-api/src/core/storage/graph_folder.rs | 612 ++++++++++++++++- .../src/client/raphtory_client.rs | 3 +- raphtory-graphql/src/data.rs | 4 +- raphtory-graphql/src/graph.rs | 2 +- raphtory-graphql/src/lib.rs | 6 +- .../src/model/graph/meta_graph.rs | 4 +- raphtory-graphql/src/paths.rs | 5 +- raphtory-graphql/src/url_encode.rs | 2 +- raphtory-tests/tests/graph_index.rs | 9 +- raphtory-tests/tests/serialise_test.rs | 4 +- raphtory/src/db/api/mutation/index_ops.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 4 +- raphtory/src/db/api/view/graph.rs | 4 +- .../src/db/api/view/internal/materialize.rs | 4 +- raphtory/src/db/graph/graph.rs | 4 +- raphtory/src/db/graph/views/deletion_graph.rs | 4 +- .../types/macros/trait_impl/serialise.rs | 2 +- raphtory/src/search/graph_index.rs | 9 +- raphtory/src/serialise/graph_folder.rs | 620 +----------------- raphtory/src/serialise/metadata.rs | 4 +- raphtory/src/serialise/parquet.rs | 8 +- raphtory/src/serialise/serialise.rs | 5 +- 22 files changed, 666 insertions(+), 655 deletions(-) diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index 245f9ebc53..ef00cd66c7 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -1,12 +1,31 @@ -//! const vars for file and directory names regarding exported graphs. +//! Raphtory container format for managing graph data. +//! +//! Folder structure: +//! +//! GraphFolder +//! ├── .raph # Metadata file (json: {path: "data{id}"}) pointing at the current data folder +//! └── data{id}/ # Data folder (incremental id for atomic replacement) +//! ├── .meta # Metadata file (json: {path: "graph{id}", meta: {}}) pointing at the current graph folder +//! ├── graph{id}/ # Graph data (incremental id for atomic replacement) +//! ├── index/ # Search indexes (optional) +//! └── vectors/ # Vector embeddings (optional) -use crate::GraphType; +use crate::{core::input::input_node::parse_u64_strict, GraphType}; +use itertools::Itertools; use serde::{Deserialize, Serialize}; #[cfg(feature = "io")] use std::{ fs::{self, File}, path::{Path, PathBuf}, }; +use std::{ + io, + io::{ErrorKind, Read, Seek, Write}, +}; +#[cfg(feature = "io")] +use walkdir::WalkDir; +#[cfg(feature = "io")] +use zip::{write::FileOptions, ZipArchive, ZipWriter}; /// Metadata file that stores path to the data folder. pub const ROOT_META_PATH: &str = ".raph"; @@ -74,3 +93,592 @@ pub enum GraphFolderError { #[error("IO operation failed: {0}")] IOErrorMsg(String), } + +pub fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphFolderError> { + relative_path + .strip_prefix(prefix) // should have the prefix + .and_then(parse_u64_strict) // the remainder should be the id + .ok_or_else(|| GraphFolderError::InvalidRelativePath(relative_path.to_string()))?; + Ok(()) +} + +fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { + let mut value = String::new(); + file.read_to_string(&mut value)?; + let path: RelativePath = serde_json::from_str(&value)?; + valid_path_pointer(&path.path, prefix)?; + Ok(path.path) +} + +pub fn read_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result, GraphFolderError> { + let file = match File::open(base_path.join(file_name)) { + Ok(file) => file, + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(error.into()), + } + } + }; + let path = read_path_from_file(file, prefix)?; + Ok(Some(path)) +} + +pub fn make_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result { + let mut id = read_path_pointer(base_path, file_name, prefix)? + .and_then(|path| { + path.strip_prefix(prefix) + .and_then(|id| id.parse::().ok()) + }) + .map_or(0, |id| id + 1); + + let mut path = format!("{prefix}{id}"); + while base_path.join(&path).exists() { + id += 1; + path = format!("{prefix}{id}"); + } + Ok(path) +} + +pub fn read_or_default_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result { + Ok(read_path_pointer(base_path, file_name, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) +} + +pub fn get_zip_data_path( + zip: &mut ZipArchive, +) -> Result { + let file = zip.by_name(ROOT_META_PATH)?; + Ok(read_path_from_file(file, DATA_PATH)?) +} + +pub fn get_zip_graph_path( + zip: &mut ZipArchive, +) -> Result { + let mut path = get_zip_data_path(zip)?; + let graph_path = get_zip_graph_path_name(zip, path.clone())?; + path.push('/'); + path.push_str(&graph_path); + Ok(path) +} + +pub fn get_zip_graph_path_name( + zip: &mut ZipArchive, + mut data_path: String, +) -> Result { + data_path.push('/'); + data_path.push_str(GRAPH_META_PATH); + let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; + Ok(graph_path) +} + +pub fn get_zip_meta_path( + zip: &mut ZipArchive, +) -> Result { + let mut path = get_zip_data_path(zip)?; + path.push('/'); + path.push_str(GRAPH_META_PATH); + Ok(path) +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct RelativePath { + pub path: String, +} + +pub trait GraphPaths { + fn root(&self) -> &Path; + + fn root_meta_path(&self) -> PathBuf { + self.root().join(ROOT_META_PATH) + } + + fn data_path(&self) -> Result { + Ok(InnerGraphFolder { + path: self.root().join(self.relative_data_path()?), + }) + } + + fn vectors_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(VECTORS_PATH); + Ok(path) + } + + fn index_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(INDEX_PATH); + Ok(path) + } + + fn graph_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(self.relative_graph_path()?); + Ok(path) + } + + fn meta_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(GRAPH_META_PATH); + Ok(path) + } + + fn is_zip(&self) -> bool { + self.root().is_file() + } + + fn read_zip(&self) -> Result, GraphFolderError> { + if self.is_zip() { + let file = File::open(self.root())?; + let archive = ZipArchive::new(file)?; + Ok(archive) + } else { + Err(GraphFolderError::NotAZip) + } + } + + fn relative_data_path(&self) -> Result { + let path = if self.is_zip() { + let mut zip = self.read_zip()?; + get_zip_data_path(&mut zip)? + } else { + read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)? + }; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + let data_path = get_zip_data_path(&mut zip)?; + get_zip_graph_path_name(&mut zip, data_path) + } else { + let data_path = self.data_path()?; + read_or_default_path_pointer(data_path.as_ref(), GRAPH_META_PATH, GRAPH_PATH) + } + } + + fn read_metadata(&self) -> Result { + let mut json = String::new(); + if self.is_zip() { + let mut zip = self.read_zip()?; + let path = get_zip_meta_path(&mut zip)?; + let mut zip_file = zip.by_name(&path)?; + zip_file.read_to_string(&mut json)?; + } else { + let mut file = File::open(self.meta_path()?)?; + file.read_to_string(&mut json)?; + } + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + fn write_metadata(&self, meta: Metadata) -> Result<(), GraphFolderError> { + meta.write_atomic(self.data_path()?.as_ref(), self.meta_path()?.as_ref())?; + Ok(()) + } + + /// Returns true if folder is occupied by a graph. + fn is_reserved(&self) -> bool { + self.meta_path().map_or(false, |path| path.exists()) + } + + /// Initialise the data folder and metadata pointer + fn init(&self) -> Result<(), GraphFolderError> { + if self.root().is_dir() { + let non_empty = self.root().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphFolderError::NonEmptyGraphFolder(self.root().into())); + } + } else { + fs::create_dir_all(self.root())? + } + + // Create the data folder and have the root metadata file point to it. + let data_path = self.relative_data_path()?; + fs::create_dir(self.root().join(&data_path))?; + fs::write( + self.root_meta_path(), + serde_json::to_string(&RelativePath { path: data_path })?, + )?; + + // Create the graph folder inside the data folder. + let graph_path = self.graph_path()?; + fs::create_dir(&graph_path)?; + + Ok(()) + } +} + +impl + ?Sized> GraphPaths for P { + fn root(&self) -> &Path { + self.as_ref() + } +} + +#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] +pub struct GraphFolder { + root_folder: PathBuf, + pub write_as_zip_format: bool, +} + +impl GraphPaths for GraphFolder { + fn root(&self) -> &Path { + &self.root_folder + } +} + +impl GraphFolder { + pub fn new_as_zip(path: impl AsRef) -> Self { + let folder: GraphFolder = path.into(); + Self { + write_as_zip_format: true, + ..folder + } + } + + /// Reserve a folder, marking it as occupied by a graph. + /// Returns an error if the folder has data. + pub fn init_write(self) -> Result { + if self.write_as_zip_format { + return Err(GraphFolderError::ZippedGraphCannotBeSwapped); + } + let relative_data_path = self.relative_data_path()?; + let meta = serde_json::to_string(&RelativePath { + path: relative_data_path.clone(), + })?; + self.ensure_clean_root_dir()?; + let metapath = self.root_folder.join(DIRTY_PATH); + let mut path_file = File::create_new(&metapath)?; + path_file.write_all(meta.as_bytes())?; + fs::create_dir_all(self.root_folder.join(relative_data_path))?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) + } + + /// Prepare a graph folder for atomically swapping the data contents. + /// This returns an error if the folder is set to write as Zip. + /// + /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and + /// the contents of the corresponding folder are deleted. + pub fn init_swap(self) -> Result { + if self.write_as_zip_format { + return Err(GraphFolderError::ZippedGraphCannotBeSwapped); + } + let old_swap = match read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH) { + Ok(path) => path, + Err(_) => { + fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up + None + } + }; + + fs::create_dir_all(self.root())?; + + let swap_path = match old_swap { + Some(relative_path) => { + let swap_path = self.root_folder.join(relative_path); + if swap_path.exists() { + fs::remove_dir_all(&swap_path)?; + } + swap_path + } + None => { + let new_relative_data_path = + make_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + let new_data_path = self.root_folder.join(&new_relative_data_path); + let meta = serde_json::to_string(&RelativePath { + path: new_relative_data_path, + })?; + let mut dirty_file = File::create_new(self.root_folder.join(DIRTY_PATH))?; + dirty_file.write_all(meta.as_bytes())?; + dirty_file.sync_all()?; + new_data_path + } + }; + fs::create_dir_all(swap_path)?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) + } + + /// Clears the folder of any contents. + pub fn clear(&self) -> Result<(), GraphFolderError> { + if self.is_zip() { + return Err(GraphFolderError::IOErrorMsg( + "Cannot clear a zip folder".to_string(), + )); + } + + fs::remove_dir_all(&self.root_folder)?; + fs::create_dir_all(&self.root_folder)?; + Ok(()) + } + + pub fn get_zip_graph_prefix(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) + } else { + let data_path = read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + let graph_path = read_or_default_path_pointer( + &self.root().join(&data_path), + GRAPH_META_PATH, + GRAPH_PATH, + )?; + Ok([data_path, graph_path].join("/")) + } + } + + fn ensure_clean_root_dir(&self) -> Result<(), GraphFolderError> { + if self.root_folder.exists() { + let non_empty = self.root_folder.read_dir()?.next().is_some(); + if non_empty { + return Err(GraphFolderError::NonEmptyGraphFolder( + self.root_folder.clone(), + )); + } + } else { + fs::create_dir(&self.root_folder)? + } + + Ok(()) + } + + pub fn is_disk_graph(&self) -> Result { + let meta = self.read_metadata()?; + Ok(meta.is_diskgraph) + } + + /// Creates a zip file from the folder. + pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphFolderError> { + if self.is_zip() { + let mut reader = File::open(&self.root_folder)?; + io::copy(&mut reader, &mut writer)?; + } else { + let mut zip = ZipWriter::new(writer); + for entry in WalkDir::new(&self.root_folder) + .into_iter() + .filter_map(Result::ok) + { + let path = entry.path(); + let rel_path = path.strip_prefix(&self.root_folder).map_err(|e| { + GraphFolderError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) + })?; + + let zip_entry_name = rel_path + .components() + .map(|name| name.as_os_str().to_string_lossy()) + .join("/"); + + if path.is_file() { + zip.start_file::<_, ()>(zip_entry_name, FileOptions::default())?; + + let mut file = File::open(path)?; + std::io::copy(&mut file, &mut zip)?; + } else if path.is_dir() && !zip_entry_name.is_empty() { + // Add empty directories to the zip + zip.add_directory::<_, ()>(zip_entry_name, FileOptions::default())?; + } + } + + zip.finish()?; + } + Ok(()) + } + + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphFolderError> { + self.ensure_clean_root_dir()?; + let mut archive = ZipArchive::new(reader)?; + archive.extract(self.root())?; + Ok(()) + } +} + +#[must_use] +#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] +pub struct WriteableGraphFolder { + path: PathBuf, +} + +impl GraphPaths for WriteableGraphFolder { + fn root(&self) -> &Path { + &self.path + } + + fn relative_data_path(&self) -> Result { + let path = read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH)? + .ok_or(GraphFolderError::NoWriteInProgress)?; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + let path = + read_or_default_path_pointer(&self.data_path()?.as_ref(), GRAPH_META_PATH, GRAPH_PATH)?; + Ok(path) + } + + fn init(&self) -> Result<(), GraphFolderError> { + Ok(()) + } +} + +impl WriteableGraphFolder { + /// Finalise an in-progress write by atomically renaming the '.dirty' file to '.raph' + /// and cleaning up any old data if it exists. + /// + /// This operation returns an error if there is no write in progress. + pub fn finish(self) -> Result { + let old_data = read_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; + fs::rename( + self.root().join(DIRTY_PATH), + self.root().join(ROOT_META_PATH), + )?; + if let Some(old_data) = old_data { + let old_data_path = self.root().join(old_data); + if old_data_path.is_dir() { + fs::remove_dir_all(old_data_path)?; + } + } + Ok(GraphFolder { + root_folder: self.path, + write_as_zip_format: false, + }) + } +} + +#[derive(Clone, Debug)] +pub struct InnerGraphFolder { + path: PathBuf, +} + +impl AsRef for InnerGraphFolder { + fn as_ref(&self) -> &Path { + &self.path + } +} + +impl InnerGraphFolder { + pub fn write_metadata(&self, meta: Metadata) -> Result<(), GraphFolderError> { + meta.write_atomic(self.as_ref(), &self.meta_path())?; + Ok(()) + } + + pub fn read_metadata(&self) -> Result { + let mut json = String::new(); + let mut file = File::open(self.meta_path())?; + file.read_to_string(&mut json)?; + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + /// Atomically point the metadata file at the graph data described by `meta`, removing the + /// previously-referenced graph directory if the path changed. + /// + /// NOTE: this does NOT encode the graph data itself. The caller must have already written + /// the graph data into the directory in `meta.path` (see the `replace_graph` in `raphtory`) + pub fn replace_graph_path(&self, meta: Metadata) -> Result<(), GraphFolderError> { + let old_relative_graph_path = self.relative_graph_path()?; + let path_changed = meta.path != old_relative_graph_path; + + self.write_metadata(meta)?; + if path_changed { + fs::remove_dir_all(self.as_ref().join(&old_relative_graph_path))?; + } + Ok(()) + } + pub fn vectors_path(&self) -> PathBuf { + self.path.join(VECTORS_PATH) + } + + pub fn index_path(&self) -> PathBuf { + self.path.join(INDEX_PATH) + } + + pub fn meta_path(&self) -> PathBuf { + self.path.join(GRAPH_META_PATH) + } + + pub fn relative_graph_path(&self) -> Result { + let relative = read_or_default_path_pointer(&self.path, GRAPH_META_PATH, GRAPH_PATH)?; + Ok(relative) + } + + pub fn graph_path(&self) -> Result { + Ok(self.path.join(self.relative_graph_path()?)) + } + + fn ensure_clean_root_dir(&self) -> Result<(), GraphFolderError> { + if self.as_ref().exists() { + let non_empty = self.as_ref().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphFolderError::NonEmptyGraphFolder( + self.as_ref().to_path_buf(), + )); + } + } else { + fs::create_dir_all(self)? + } + Ok(()) + } + + /// Extracts a zip file to the folder. + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphFolderError> { + self.ensure_clean_root_dir()?; + + let mut zip = ZipArchive::new(reader)?; + let data_dir = get_zip_data_path(&mut zip)?; + + for i in 0..zip.len() { + let mut file = zip.by_index(i)?; + let zip_entry_name = match file.enclosed_name() { + Some(name) => name, + None => continue, + }; + if let Ok(inner_path) = zip_entry_name.strip_prefix(&data_dir) { + let out_path = self.as_ref().join(inner_path); + if file.is_dir() { + std::fs::create_dir_all(&out_path)?; + } else { + // Create any parent directories + if let Some(parent) = out_path.parent() { + std::fs::create_dir_all(parent)?; + } + + let mut out_file = std::fs::File::create(&out_path)?; + std::io::copy(&mut file, &mut out_file)?; + } + } + } + + Ok(()) + } +} + +impl> From

for GraphFolder { + fn from(value: P) -> Self { + let path: &Path = value.as_ref(); + Self { + root_folder: path.to_path_buf(), + write_as_zip_format: false, + } + } +} + +impl From<&GraphFolder> for GraphFolder { + fn from(value: &GraphFolder) -> Self { + value.clone() + } +} diff --git a/raphtory-graphql/src/client/raphtory_client.rs b/raphtory-graphql/src/client/raphtory_client.rs index c8af4c36ae..6280fda1e6 100644 --- a/raphtory-graphql/src/client/raphtory_client.rs +++ b/raphtory-graphql/src/client/raphtory_client.rs @@ -2,7 +2,8 @@ use crate::{ client::{ClientError, GraphQLRemoteGraph}, url_encode::url_decode_graph, }; -use raphtory::{db::api::view::MaterializedGraph, prelude::Config, serialise::GraphFolder}; +use raphtory::{db::api::view::MaterializedGraph, prelude::Config}; +use raphtory_api::core::storage::graph_folder::GraphFolder; use reqwest::{multipart, multipart::Part, Client}; use serde_json::{json, Value as JsonValue}; use std::{collections::HashMap, io::Cursor}; diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 5d950372e3..cbe26db4c7 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -32,12 +32,12 @@ use raphtory::{ }, errors::GraphError, prelude::AdditionOps, - serialise::GraphPaths, vectors::{ cache::CachedEmbeddingModel, storage::LazyDiskVectorCache, template::DocumentTemplate, vectorisable::Vectorisable, vectorised_graph::VectorisedGraph, }, }; +use raphtory_api::core::storage::graph_folder::GraphPaths; use std::{ fs, io, io::{Read, Seek}, @@ -771,8 +771,8 @@ pub(crate) mod data_tests { use raphtory::{ db::api::view::{internal::InternalStorageOps, MaterializedGraph}, prelude::*, - serialise::GraphPaths, }; + use raphtory_api::core::storage::graph_folder::GraphPaths; use std::{collections::HashMap, fs, path::Path, time::Duration}; use tokio::time::sleep; diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index abe8a4217f..0df9524eaa 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -20,9 +20,9 @@ use raphtory::{ }, errors::{GraphError, GraphResult}, prelude::{EdgeViewOps, StableDecode}, - serialise::GraphPaths, vectors::{storage::LazyDiskVectorCache, vectorised_graph::VectorisedGraph}, }; +use raphtory_api::core::storage::graph_folder::GraphPaths; use raphtory_storage::{ core_ops::InheritCoreGraphOps, layer_ops::InheritLayerOps, mutation::InheritMutationOps, }; diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 1a3f2acfde..8d74e5b71a 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -72,9 +72,11 @@ mod graphql_test { graph::views::deletion_graph::PersistentGraph, }, prelude::*, - serialise::GraphFolder, }; - use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; + use raphtory_api::core::{ + entities::GID, + storage::{arc_str::ArcStr, graph_folder::GraphFolder}, + }; use serde_json::{json, Value}; use std::{ collections::{HashMap, HashSet}, diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 26c54db647..987abeaef3 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -9,9 +9,9 @@ use raphtory::{ db::api::storage::storage::read_constant_graph_properties, errors::GraphError, prelude::{GraphViewOps, PropertiesOps}, - serialise::{parquet::decode_graph_metadata, GraphPaths}, + serialise::parquet::decode_graph_metadata, }; -use raphtory_api::core::storage::graph_folder::GraphMetadata; +use raphtory_api::core::storage::graph_folder::{GraphMetadata, GraphPaths}; use std::{cmp::Ordering, sync::Arc}; use tokio::sync::OnceCell; diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 76f7f22062..87ae66c9db 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -9,11 +9,12 @@ use raphtory::{ prelude::{AdditionOps, GraphViewOps}, serialise::{ metadata::{build_graph_metadata, replace_graph_in_folder}, - GraphFolder, GraphPaths, RelativePath, StableDecode, WriteableGraphFolder, + StableDecode, }, }; use raphtory_api::core::storage::graph_folder::{ - GraphFolderError, GraphMetadata, Metadata, DIRTY_PATH, ROOT_META_PATH, + GraphFolder, GraphFolderError, GraphMetadata, GraphPaths, Metadata, RelativePath, + WriteableGraphFolder, DIRTY_PATH, ROOT_META_PATH, }; use std::{ cmp::Ordering, diff --git a/raphtory-graphql/src/url_encode.rs b/raphtory-graphql/src/url_encode.rs index 00009af1df..1d9ad749db 100644 --- a/raphtory-graphql/src/url_encode.rs +++ b/raphtory-graphql/src/url_encode.rs @@ -6,8 +6,8 @@ use raphtory::{ }, errors::GraphError, prelude::{StableDecode, StableEncode}, - serialise::GraphPaths, }; +use raphtory_api::core::storage::graph_folder::GraphPaths; #[derive(thiserror::Error, Debug)] pub enum UrlDecodeError { diff --git a/raphtory-tests/tests/graph_index.rs b/raphtory-tests/tests/graph_index.rs index 3e6f233ea0..8a086cec36 100644 --- a/raphtory-tests/tests/graph_index.rs +++ b/raphtory-tests/tests/graph_index.rs @@ -139,11 +139,13 @@ mod test_index { }, errors::GraphError, prelude::*, - serialise::GraphFolder, }; use raphtory_api::core::{ entities::properties::prop::Prop, - storage::{arc_str::ArcStr, graph_folder::GraphFolderError}, + storage::{ + arc_str::ArcStr, + graph_folder::{GraphFolder, GraphFolderError}, + }, utils::logging::global_info_logger, }; use tempfile::TempDir; @@ -529,8 +531,9 @@ mod test_index { }, errors::GraphError, prelude::{AdditionOps, Graph, IndexMutationOps, SearchableGraphOps, StableDecode}, - serialise::{GraphFolder, StableEncode}, + serialise::StableEncode, }; + use raphtory_api::core::storage::graph_folder::GraphFolder; use raphtory_tests::assertions::{search_edges, search_nodes}; use tempfile::{tempdir, TempDir}; diff --git a/raphtory-tests/tests/serialise_test.rs b/raphtory-tests/tests/serialise_test.rs index 59fe636189..9a71beab88 100644 --- a/raphtory-tests/tests/serialise_test.rs +++ b/raphtory-tests/tests/serialise_test.rs @@ -9,9 +9,9 @@ mod serialise_test { graph::{graph::assert_graph_equal, views::deletion_graph::PersistentGraph}, }, prelude::*, - serialise::{metadata::assert_metadata_correct, GraphFolder}, + serialise::metadata::assert_metadata_correct, }; - use raphtory_api::core::storage::arc_str::ArcStr; + use raphtory_api::core::storage::{arc_str::ArcStr, graph_folder::GraphFolder}; use raphtory_tests::utils::{build_edge_list, build_graph_from_edge_list}; use tempfile::TempDir; diff --git a/raphtory/src/db/api/mutation/index_ops.rs b/raphtory/src/db/api/mutation/index_ops.rs index 8be7709aee..f05347f40a 100644 --- a/raphtory/src/db/api/mutation/index_ops.rs +++ b/raphtory/src/db/api/mutation/index_ops.rs @@ -2,8 +2,8 @@ use crate::{ db::api::view::{IndexSpec, IndexSpecBuilder}, errors::GraphError, prelude::AdditionOps, - serialise::{GraphFolder, GraphPaths}, }; +use raphtory_api::core::storage::graph_folder::{GraphFolder, GraphPaths}; use std::{ fs::File, io::{Seek, Write}, diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 3d88833efa..653c199e10 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -36,12 +36,13 @@ use std::{ }; use storage::wal::LSN; +#[cfg(feature = "search")] +use raphtory_api::core::storage::graph_folder::{GraphFolder, GraphPaths}; #[cfg(feature = "search")] use { crate::{ db::api::view::IndexSpec, search::graph_index::{GraphIndex, MutableGraphIndex}, - serialise::{GraphFolder, GraphPaths}, }, parking_lot::RwLock, raphtory_api::core::entities::properties::prop::IntoProp, @@ -53,7 +54,6 @@ use { tracing::info, zip::ZipWriter, }; - // Re-export for raphtory dependencies to use when creating graphs. pub use storage::{ persist::strategy::PersistenceStrategy, read_constant_graph_properties, Config, Extension, diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 12f435ae01..e424b4f969 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -1,5 +1,5 @@ #[cfg(feature = "io")] -use crate::serialise::{metadata::build_graph_metadata, GraphPaths}; +use crate::serialise::metadata::build_graph_metadata; use crate::{ arrow_loader::{ dataframe::{DFChunk, DFView}, @@ -44,6 +44,8 @@ use arrow::array::RecordBatch; use db4_graph::TemporalGraph; use either::Either; #[cfg(feature = "io")] +use raphtory_api::core::storage::graph_folder::GraphPaths; +#[cfg(feature = "io")] use raphtory_api::core::storage::graph_folder::Metadata as GraphFolderMetadata; use raphtory_api::core::{ entities::properties::meta::{Meta, PropMapper}, diff --git a/raphtory/src/db/api/view/internal/materialize.rs b/raphtory/src/db/api/view/internal/materialize.rs index c3a0099b05..1983078d1d 100644 --- a/raphtory/src/db/api/view/internal/materialize.rs +++ b/raphtory/src/db/api/view/internal/materialize.rs @@ -1,5 +1,3 @@ -#[cfg(feature = "io")] -use crate::serialise::GraphPaths; use crate::{ core::storage::timeindex::EventTime, db::{ @@ -9,6 +7,8 @@ use crate::{ errors::GraphError, prelude::*, }; +#[cfg(feature = "io")] +use raphtory_api::core::storage::graph_folder::GraphPaths; use raphtory_api::{iter::BoxedLIter, GraphType}; use raphtory_storage::{graph::graph::GraphStorage, mutation::InheritMutationOps}; use std::ops::Range; diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index cf1b5bac75..b3042fd0d0 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -17,7 +17,7 @@ //! use super::views::deletion_graph::PersistentGraph; #[cfg(feature = "io")] -use crate::serialise::{metadata::build_graph_metadata, GraphPaths}; +use crate::serialise::metadata::build_graph_metadata; use crate::{ db::{ api::{ @@ -37,7 +37,7 @@ use crate::{ prelude::*, }; #[cfg(feature = "io")] -use raphtory_api::core::storage::graph_folder::Metadata as GraphFolderMetadata; +use raphtory_api::core::storage::graph_folder::{GraphPaths, Metadata as GraphFolderMetadata}; use raphtory_api::{ core::storage::{arc_str::ArcStr, timeindex::AsTime}, inherit::Base, diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index 19bcbc1fbe..ccc9f704e6 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -1,5 +1,5 @@ #[cfg(feature = "io")] -use crate::serialise::{metadata::build_graph_metadata, GraphPaths}; +use crate::serialise::metadata::build_graph_metadata; use crate::{ core::storage::timeindex::{AsTime, EventTime, TimeIndex, TimeIndexOps}, db::{ @@ -16,6 +16,8 @@ use crate::{ prelude::*, }; #[cfg(feature = "io")] +use raphtory_api::core::storage::graph_folder::GraphPaths; +#[cfg(feature = "io")] use raphtory_api::core::storage::graph_folder::Metadata; use raphtory_api::{ core::entities::properties::tprop::TPropOps, diff --git a/raphtory/src/python/types/macros/trait_impl/serialise.rs b/raphtory/src/python/types/macros/trait_impl/serialise.rs index bc0bfab9d3..c856b11073 100644 --- a/raphtory/src/python/types/macros/trait_impl/serialise.rs +++ b/raphtory/src/python/types/macros/trait_impl/serialise.rs @@ -39,7 +39,7 @@ macro_rules! impl_serialise { /// Returns: /// None: fn save_to_zip(&self, path: PathBuf) -> Result<(), GraphError> { - let folder = $crate::serialise::GraphFolder::new_as_zip(path); + let folder = $crate::api::core::storage::graph_folder::GraphFolder::new_as_zip(path); $crate::serialise::StableEncode::encode(&self.$field, folder) } diff --git a/raphtory/src/search/graph_index.rs b/raphtory/src/search/graph_index.rs index c0a76e71c2..2eb237a566 100644 --- a/raphtory/src/search/graph_index.rs +++ b/raphtory/src/search/graph_index.rs @@ -7,10 +7,15 @@ use crate::{ errors::GraphError, prelude::*, search::{edge_index::EdgeIndex, node_index::NodeIndex, searcher::Searcher}, - serialise::{GraphFolder, GraphPaths, InnerGraphFolder}, }; use parking_lot::RwLock; -use raphtory_api::core::{entities::LayerId, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::{ + entities::LayerId, + storage::{ + dict_mapper::MaybeNew, + graph_folder::{GraphFolder, GraphPaths, InnerGraphFolder}, + }, +}; use raphtory_storage::graph::graph::GraphStorage; use std::{ ffi::OsStr, diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 073ac94311..8cfa9ef4c1 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -1,623 +1,6 @@ -//! Raphtory container format for managing graph data. -//! -//! Folder structure: -//! -//! GraphFolder -//! ├── .raph # Metadata file (json: {path: "data{id}"}) pointing at the current data folder -//! └── data{id}/ # Data folder (incremental id for atomic replacement) -//! ├── .meta # Metadata file (json: {path: "graph{id}", meta: {}}) pointing at the current graph folder -//! ├── graph{id}/ # Graph data (incremental id for atomic replacement) -//! ├── index/ # Search indexes (optional) -//! └── vectors/ # Vector embeddings (optional) - use itertools::Itertools; -use raphtory_api::core::{ - input::input_node::parse_u64_strict, - storage::graph_folder::{ - GraphFolderError, GraphMetadata, Metadata, DATA_PATH, DIRTY_PATH, GRAPH_META_PATH, - GRAPH_PATH, INDEX_PATH, ROOT_META_PATH, VECTORS_PATH, - }, -}; use serde::{Deserialize, Serialize}; -use std::{ - fs::{self, File}, - io::{self, ErrorKind, Read, Seek, Write}, - path::{Path, PathBuf}, -}; -use walkdir::WalkDir; -use zip::{write::FileOptions, ZipArchive, ZipWriter}; - -pub(crate) fn valid_path_pointer( - relative_path: &str, - prefix: &str, -) -> Result<(), GraphFolderError> { - relative_path - .strip_prefix(prefix) // should have the prefix - .and_then(parse_u64_strict) // the remainder should be the id - .ok_or_else(|| GraphFolderError::InvalidRelativePath(relative_path.to_string()))?; - Ok(()) -} - -fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { - let mut value = String::new(); - file.read_to_string(&mut value)?; - let path: RelativePath = serde_json::from_str(&value)?; - valid_path_pointer(&path.path, prefix)?; - Ok(path.path) -} - -pub fn read_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result, GraphFolderError> { - let file = match File::open(base_path.join(file_name)) { - Ok(file) => file, - Err(error) => { - return match error.kind() { - ErrorKind::NotFound => Ok(None), - _ => Err(error.into()), - } - } - }; - let path = read_path_from_file(file, prefix)?; - Ok(Some(path)) -} - -pub fn make_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result { - let mut id = read_path_pointer(base_path, file_name, prefix)? - .and_then(|path| { - path.strip_prefix(prefix) - .and_then(|id| id.parse::().ok()) - }) - .map_or(0, |id| id + 1); - - let mut path = format!("{prefix}{id}"); - while base_path.join(&path).exists() { - id += 1; - path = format!("{prefix}{id}"); - } - Ok(path) -} - -pub fn read_or_default_path_pointer( - base_path: &Path, - file_name: &str, - prefix: &str, -) -> Result { - Ok(read_path_pointer(base_path, file_name, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) -} - -pub fn get_zip_data_path( - zip: &mut ZipArchive, -) -> Result { - let file = zip.by_name(ROOT_META_PATH)?; - Ok(read_path_from_file(file, DATA_PATH)?) -} - -pub fn get_zip_graph_path( - zip: &mut ZipArchive, -) -> Result { - let mut path = get_zip_data_path(zip)?; - let graph_path = get_zip_graph_path_name(zip, path.clone())?; - path.push('/'); - path.push_str(&graph_path); - Ok(path) -} - -pub fn get_zip_graph_path_name( - zip: &mut ZipArchive, - mut data_path: String, -) -> Result { - data_path.push('/'); - data_path.push_str(GRAPH_META_PATH); - let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; - Ok(graph_path) -} - -pub fn get_zip_meta_path( - zip: &mut ZipArchive, -) -> Result { - let mut path = get_zip_data_path(zip)?; - path.push('/'); - path.push_str(GRAPH_META_PATH); - Ok(path) -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct RelativePath { - pub path: String, -} - -pub trait GraphPaths { - fn root(&self) -> &Path; - - fn root_meta_path(&self) -> PathBuf { - self.root().join(ROOT_META_PATH) - } - - fn data_path(&self) -> Result { - Ok(InnerGraphFolder { - path: self.root().join(self.relative_data_path()?), - }) - } - - fn vectors_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(VECTORS_PATH); - Ok(path) - } - - fn index_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(INDEX_PATH); - Ok(path) - } - - fn graph_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(self.relative_graph_path()?); - Ok(path) - } - - fn meta_path(&self) -> Result { - let mut path = self.data_path()?.path; - path.push(GRAPH_META_PATH); - Ok(path) - } - - fn is_zip(&self) -> bool { - self.root().is_file() - } - - fn read_zip(&self) -> Result, GraphFolderError> { - if self.is_zip() { - let file = File::open(self.root())?; - let archive = ZipArchive::new(file)?; - Ok(archive) - } else { - Err(GraphFolderError::NotAZip) - } - } - - fn relative_data_path(&self) -> Result { - let path = if self.is_zip() { - let mut zip = self.read_zip()?; - get_zip_data_path(&mut zip)? - } else { - read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)? - }; - Ok(path) - } - - fn relative_graph_path(&self) -> Result { - if self.is_zip() { - let mut zip = self.read_zip()?; - let data_path = get_zip_data_path(&mut zip)?; - get_zip_graph_path_name(&mut zip, data_path) - } else { - let data_path = self.data_path()?; - read_or_default_path_pointer(data_path.as_ref(), GRAPH_META_PATH, GRAPH_PATH) - } - } - - fn read_metadata(&self) -> Result { - let mut json = String::new(); - if self.is_zip() { - let mut zip = self.read_zip()?; - let path = get_zip_meta_path(&mut zip)?; - let mut zip_file = zip.by_name(&path)?; - zip_file.read_to_string(&mut json)?; - } else { - let mut file = File::open(self.meta_path()?)?; - file.read_to_string(&mut json)?; - } - let metadata: Metadata = serde_json::from_str(&json)?; - Ok(metadata.meta) - } - - fn write_metadata(&self, meta: Metadata) -> Result<(), GraphFolderError> { - meta.write_atomic(self.data_path()?.as_ref(), self.meta_path()?.as_ref())?; - Ok(()) - } - - /// Returns true if folder is occupied by a graph. - fn is_reserved(&self) -> bool { - self.meta_path().map_or(false, |path| path.exists()) - } - - /// Initialise the data folder and metadata pointer - fn init(&self) -> Result<(), GraphFolderError> { - if self.root().is_dir() { - let non_empty = self.root().read_dir()?.next().is_some(); - if non_empty { - return Err(GraphFolderError::NonEmptyGraphFolder(self.root().into())); - } - } else { - fs::create_dir_all(self.root())? - } - - // Create the data folder and have the root metadata file point to it. - let data_path = self.relative_data_path()?; - fs::create_dir(self.root().join(&data_path))?; - fs::write( - self.root_meta_path(), - serde_json::to_string(&RelativePath { path: data_path })?, - )?; - - // Create the graph folder inside the data folder. - let graph_path = self.graph_path()?; - fs::create_dir(&graph_path)?; - - Ok(()) - } -} - -impl + ?Sized> GraphPaths for P { - fn root(&self) -> &Path { - self.as_ref() - } -} - -#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] -pub struct GraphFolder { - root_folder: PathBuf, - pub write_as_zip_format: bool, -} - -impl GraphPaths for GraphFolder { - fn root(&self) -> &Path { - &self.root_folder - } -} - -impl GraphFolder { - pub fn new_as_zip(path: impl AsRef) -> Self { - let folder: GraphFolder = path.into(); - Self { - write_as_zip_format: true, - ..folder - } - } - - /// Reserve a folder, marking it as occupied by a graph. - /// Returns an error if the folder has data. - pub fn init_write(self) -> Result { - if self.write_as_zip_format { - return Err(GraphFolderError::ZippedGraphCannotBeSwapped); - } - let relative_data_path = self.relative_data_path()?; - let meta = serde_json::to_string(&RelativePath { - path: relative_data_path.clone(), - })?; - self.ensure_clean_root_dir()?; - let metapath = self.root_folder.join(DIRTY_PATH); - let mut path_file = File::create_new(&metapath)?; - path_file.write_all(meta.as_bytes())?; - fs::create_dir_all(self.root_folder.join(relative_data_path))?; - Ok(WriteableGraphFolder { - path: self.root_folder, - }) - } - - /// Prepare a graph folder for atomically swapping the data contents. - /// This returns an error if the folder is set to write as Zip. - /// - /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and - /// the contents of the corresponding folder are deleted. - pub fn init_swap(self) -> Result { - if self.write_as_zip_format { - return Err(GraphFolderError::ZippedGraphCannotBeSwapped); - } - let old_swap = match read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH) { - Ok(path) => path, - Err(_) => { - fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up - None - } - }; - - fs::create_dir_all(self.root())?; - - let swap_path = match old_swap { - Some(relative_path) => { - let swap_path = self.root_folder.join(relative_path); - if swap_path.exists() { - fs::remove_dir_all(&swap_path)?; - } - swap_path - } - None => { - let new_relative_data_path = - make_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - let new_data_path = self.root_folder.join(&new_relative_data_path); - let meta = serde_json::to_string(&RelativePath { - path: new_relative_data_path, - })?; - let mut dirty_file = File::create_new(self.root_folder.join(DIRTY_PATH))?; - dirty_file.write_all(meta.as_bytes())?; - dirty_file.sync_all()?; - new_data_path - } - }; - fs::create_dir_all(swap_path)?; - Ok(WriteableGraphFolder { - path: self.root_folder, - }) - } - - /// Clears the folder of any contents. - pub fn clear(&self) -> Result<(), GraphFolderError> { - if self.is_zip() { - return Err(GraphFolderError::IOErrorMsg( - "Cannot clear a zip folder".to_string(), - )); - } - - fs::remove_dir_all(&self.root_folder)?; - fs::create_dir_all(&self.root_folder)?; - Ok(()) - } - - pub fn get_zip_graph_prefix(&self) -> Result { - if self.is_zip() { - let mut zip = self.read_zip()?; - Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) - } else { - let data_path = read_or_default_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - let graph_path = read_or_default_path_pointer( - &self.root().join(&data_path), - GRAPH_META_PATH, - GRAPH_PATH, - )?; - Ok([data_path, graph_path].join("/")) - } - } - - fn ensure_clean_root_dir(&self) -> Result<(), GraphFolderError> { - if self.root_folder.exists() { - let non_empty = self.root_folder.read_dir()?.next().is_some(); - if non_empty { - return Err(GraphFolderError::NonEmptyGraphFolder( - self.root_folder.clone(), - )); - } - } else { - fs::create_dir(&self.root_folder)? - } - - Ok(()) - } - - pub fn is_disk_graph(&self) -> Result { - let meta = self.read_metadata()?; - Ok(meta.is_diskgraph) - } - - /// Creates a zip file from the folder. - pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphFolderError> { - if self.is_zip() { - let mut reader = File::open(&self.root_folder)?; - io::copy(&mut reader, &mut writer)?; - } else { - let mut zip = ZipWriter::new(writer); - for entry in WalkDir::new(&self.root_folder) - .into_iter() - .filter_map(Result::ok) - { - let path = entry.path(); - let rel_path = path.strip_prefix(&self.root_folder).map_err(|e| { - GraphFolderError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) - })?; - - let zip_entry_name = rel_path - .components() - .map(|name| name.as_os_str().to_string_lossy()) - .join("/"); - - if path.is_file() { - zip.start_file::<_, ()>(zip_entry_name, FileOptions::default())?; - - let mut file = File::open(path)?; - std::io::copy(&mut file, &mut zip)?; - } else if path.is_dir() && !zip_entry_name.is_empty() { - // Add empty directories to the zip - zip.add_directory::<_, ()>(zip_entry_name, FileOptions::default())?; - } - } - - zip.finish()?; - } - Ok(()) - } - - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphFolderError> { - self.ensure_clean_root_dir()?; - let mut archive = ZipArchive::new(reader)?; - archive.extract(self.root())?; - Ok(()) - } -} - -#[must_use] -#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] -pub struct WriteableGraphFolder { - path: PathBuf, -} - -impl GraphPaths for WriteableGraphFolder { - fn root(&self) -> &Path { - &self.path - } - - fn relative_data_path(&self) -> Result { - let path = read_path_pointer(self.root(), DIRTY_PATH, DATA_PATH)? - .ok_or(GraphFolderError::NoWriteInProgress)?; - Ok(path) - } - - fn relative_graph_path(&self) -> Result { - let path = - read_or_default_path_pointer(&self.data_path()?.as_ref(), GRAPH_META_PATH, GRAPH_PATH)?; - Ok(path) - } - - fn init(&self) -> Result<(), GraphFolderError> { - Ok(()) - } -} - -impl WriteableGraphFolder { - /// Finalise an in-progress write by atomically renaming the '.dirty' file to '.raph' - /// and cleaning up any old data if it exists. - /// - /// This operation returns an error if there is no write in progress. - pub fn finish(self) -> Result { - let old_data = read_path_pointer(self.root(), ROOT_META_PATH, DATA_PATH)?; - fs::rename( - self.root().join(DIRTY_PATH), - self.root().join(ROOT_META_PATH), - )?; - if let Some(old_data) = old_data { - let old_data_path = self.root().join(old_data); - if old_data_path.is_dir() { - fs::remove_dir_all(old_data_path)?; - } - } - Ok(GraphFolder { - root_folder: self.path, - write_as_zip_format: false, - }) - } -} - -#[derive(Clone, Debug)] -pub struct InnerGraphFolder { - path: PathBuf, -} - -impl AsRef for InnerGraphFolder { - fn as_ref(&self) -> &Path { - &self.path - } -} - -impl InnerGraphFolder { - pub fn write_metadata(&self, meta: Metadata) -> Result<(), GraphFolderError> { - meta.write_atomic(self.as_ref(), &self.meta_path())?; - Ok(()) - } - - pub fn read_metadata(&self) -> Result { - let mut json = String::new(); - let mut file = File::open(self.meta_path())?; - file.read_to_string(&mut json)?; - let metadata: Metadata = serde_json::from_str(&json)?; - Ok(metadata.meta) - } - - /// Atomically point the metadata file at the graph data described by `meta`, removing the - /// previously-referenced graph directory if the path changed. - /// - /// NOTE: this does NOT encode the graph data itself. The caller must have already written - /// the graph data into the directory in `meta.path` (see the `replace_graph` in `raphtory`) - pub fn replace_graph_path(&self, meta: Metadata) -> Result<(), GraphFolderError> { - let old_relative_graph_path = self.relative_graph_path()?; - let path_changed = meta.path != old_relative_graph_path; - - self.write_metadata(meta)?; - if path_changed { - fs::remove_dir_all(self.as_ref().join(&old_relative_graph_path))?; - } - Ok(()) - } - pub fn vectors_path(&self) -> PathBuf { - self.path.join(VECTORS_PATH) - } - - pub fn index_path(&self) -> PathBuf { - self.path.join(INDEX_PATH) - } - - pub fn meta_path(&self) -> PathBuf { - self.path.join(GRAPH_META_PATH) - } - - pub fn relative_graph_path(&self) -> Result { - let relative = read_or_default_path_pointer(&self.path, GRAPH_META_PATH, GRAPH_PATH)?; - Ok(relative) - } - - pub fn graph_path(&self) -> Result { - Ok(self.path.join(self.relative_graph_path()?)) - } - - fn ensure_clean_root_dir(&self) -> Result<(), GraphFolderError> { - if self.as_ref().exists() { - let non_empty = self.as_ref().read_dir()?.next().is_some(); - if non_empty { - return Err(GraphFolderError::NonEmptyGraphFolder( - self.as_ref().to_path_buf(), - )); - } - } else { - fs::create_dir_all(self)? - } - Ok(()) - } - - /// Extracts a zip file to the folder. - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphFolderError> { - self.ensure_clean_root_dir()?; - - let mut zip = ZipArchive::new(reader)?; - let data_dir = get_zip_data_path(&mut zip)?; - - for i in 0..zip.len() { - let mut file = zip.by_index(i)?; - let zip_entry_name = match file.enclosed_name() { - Some(name) => name, - None => continue, - }; - if let Ok(inner_path) = zip_entry_name.strip_prefix(&data_dir) { - let out_path = self.as_ref().join(inner_path); - if file.is_dir() { - std::fs::create_dir_all(&out_path)?; - } else { - // Create any parent directories - if let Some(parent) = out_path.parent() { - std::fs::create_dir_all(parent)?; - } - - let mut out_file = std::fs::File::create(&out_path)?; - std::io::copy(&mut file, &mut out_file)?; - } - } - } - - Ok(()) - } -} - -impl> From

for GraphFolder { - fn from(value: P) -> Self { - let path: &Path = value.as_ref(); - Self { - root_folder: path.to_path_buf(), - write_as_zip_format: false, - } - } -} - -impl From<&GraphFolder> for GraphFolder { - fn from(value: &GraphFolder) -> Self { - value.clone() - } -} +use std::io::{Read, Seek, Write}; #[cfg(test)] mod tests { @@ -625,6 +8,7 @@ mod tests { use crate::{ db::graph::graph::assert_graph_equal, prelude::*, serialise::serialise::StableDecode, }; + use raphtory_api::core::storage::graph_folder::GraphPaths; // /// Verify that the metadata is re-created if it does not exist. // #[test] diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 464d1d6098..2bbc635291 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -2,10 +2,10 @@ use crate::{ db::api::view::internal::GraphView, errors::GraphError, prelude::{GraphViewOps, ParquetEncoder}, - serialise::{make_path_pointer, GraphFolder, GraphPaths, InnerGraphFolder}, }; use raphtory_api::core::storage::graph_folder::{ - GraphMetadata, Metadata, GRAPH_META_PATH, GRAPH_PATH, + make_path_pointer, GraphFolder, GraphMetadata, GraphPaths, InnerGraphFolder, Metadata, + GRAPH_META_PATH, GRAPH_PATH, }; /// Build the [`GraphMetadata`] summary for a graph diff --git a/raphtory/src/serialise/parquet.rs b/raphtory/src/serialise/parquet.rs index d979913748..c438798f97 100644 --- a/raphtory/src/serialise/parquet.rs +++ b/raphtory/src/serialise/parquet.rs @@ -17,7 +17,6 @@ use crate::{ SECONDARY_INDEX_COL, SRC_GID_COL, SRC_VID_COL, TIME_COL, TYPE_COL, TYPE_ID_COL, }, prelude::*, - serialise::GraphPaths, }; use arrow::{array::RecordBatch, datatypes::SchemaRef}; use itertools::Itertools; @@ -26,7 +25,12 @@ use parquet::{ basic::Compression, file::{metadata::KeyValue, properties::WriterProperties}, }; -use raphtory_api::{core::entities::properties::prop::prop_col::lift_property_col, GraphType}; +use raphtory_api::{ + core::{ + entities::properties::prop::prop_col::lift_property_col, storage::graph_folder::GraphPaths, + }, + GraphType, +}; use raphtory_storage::core_ops::CoreGraphOps; use std::{ fs::File, diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index cc99e1635f..964d145e62 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -6,14 +6,13 @@ use crate::{ }, errors::GraphError, serialise::{ - get_zip_graph_path, metadata::build_graph_metadata, parquet::{ParquetDecoder, ParquetEncoder}, - GraphFolder, GraphPaths, RelativePath, }, }; use raphtory_api::core::storage::graph_folder::{ - Metadata, DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH, GRAPH_META_PATH, ROOT_META_PATH, + get_zip_graph_path, GraphFolder, GraphPaths, Metadata, RelativePath, DEFAULT_DATA_PATH, + DEFAULT_GRAPH_PATH, GRAPH_META_PATH, ROOT_META_PATH, }; use std::{ fs::File, From 37c855c3e522b2eb9ac351a820fab1e125457626 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 26 Jun 2026 04:15:22 -0400 Subject: [PATCH 17/20] Move tests to raphtory-tests and get rid of old graph_folder.rs file --- raphtory-tests/tests/test_graph_folder.rs | 220 +++++++++++++++++ .../types/macros/trait_impl/serialise.rs | 3 +- raphtory/src/serialise/graph_folder.rs | 230 ------------------ raphtory/src/serialise/mod.rs | 2 - 4 files changed, 222 insertions(+), 233 deletions(-) create mode 100644 raphtory-tests/tests/test_graph_folder.rs delete mode 100644 raphtory/src/serialise/graph_folder.rs diff --git a/raphtory-tests/tests/test_graph_folder.rs b/raphtory-tests/tests/test_graph_folder.rs new file mode 100644 index 0000000000..ff03013a8c --- /dev/null +++ b/raphtory-tests/tests/test_graph_folder.rs @@ -0,0 +1,220 @@ +use raphtory::{db::graph::graph::assert_graph_equal, prelude::*, serialise::StableDecode}; +use raphtory_api::core::storage::graph_folder::{GraphFolder, GraphPaths}; + +// /// Verify that the metadata is re-created if it does not exist. +// #[test] +// #[ignore = "Need to think about how to deal with reading old format"] +// fn test_read_metadata_from_noninitialized_zip() { +// global_info_logger(); +// +// let graph = Graph::new(); +// graph.add_node(0, 0, NO_PROPS, None).unwrap(); +// +// let tmp_dir = tempfile::TempDir::new().unwrap(); +// let zip_path = tmp_dir.path().join("graph.zip"); +// let folder = GraphFolder::new_as_zip(&zip_path); +// graph.encode(&folder).unwrap(); +// +// // Remove the metadata file from the zip to simulate a noninitialized zip +// remove_metadata_from_zip(&zip_path); +// +// // Should fail because the metadata file is not present +// let err = folder.try_read_metadata(); +// assert!(err.is_err()); +// +// // Should re-create the metadata file +// let result = folder.read_metadata().unwrap(); +// assert_eq!( +// result, +// GraphMetadata { +// node_count: 1, +// edge_count: 0, +// metadata: vec![], +// graph_type: GraphType::EventGraph, +// is_diskgraph: false +// } +// ); +// } + +// /// Helper function to remove the metadata file from a zip +// fn remove_metadata_from_zip(zip_path: &Path) { +// let mut zip_file = std::fs::File::open(&zip_path).unwrap(); +// let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); +// let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); +// +// // Scope for the zip writer +// { +// let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); +// +// for i in 0..zip_archive.len() { +// let mut file = zip_archive.by_index(i).unwrap(); +// +// // Copy all files except the metadata file +// if file.name() != META_PATH { +// zip_writer +// .start_file::<_, ()>(file.name(), FileOptions::default()) +// .unwrap(); +// std::io::copy(&mut file, &mut zip_writer).unwrap(); +// } +// } +// +// zip_writer.finish().unwrap(); +// } +// +// std::fs::copy(temp_zip.path(), &zip_path).unwrap(); +// } + +// /// Verify that the metadata is re-created if it does not exist. +// #[test] +// #[ignore = "Need to think about how to handle reading from old format"] +// fn test_read_metadata_from_noninitialized_folder() { +// global_info_logger(); +// +// let graph = Graph::new(); +// graph.add_node(0, 0, NO_PROPS, None).unwrap(); +// +// let temp_folder = tempfile::TempDir::new().unwrap(); +// let folder = GraphFolder::from(temp_folder.path()); +// graph.encode(&folder).unwrap(); +// +// // Remove the metadata file +// std::fs::remove_file(folder.get_meta_path()).unwrap(); +// +// // Should fail because the metadata file is not present +// let err = folder.try_read_metadata(); +// assert!(err.is_err()); +// +// // Should re-create the metadata file +// let result = folder.read_metadata().unwrap(); +// assert_eq!( +// result, +// GraphMetadata { +// node_count: 1, +// edge_count: 0, +// metadata: vec![], +// graph_type: GraphType::EventGraph, +// is_diskgraph: false +// } +// ); +// } +#[test] +fn test_zip_from_folder() { + let graph = Graph::new(); + graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); + graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); + graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); + + // Create a regular folder and encode the graph + let temp_folder = tempfile::TempDir::new().unwrap(); + let initial_folder = GraphFolder::from(temp_folder.path().join("initial")); + graph.encode(&initial_folder).unwrap(); + + assert!(initial_folder.graph_path().unwrap().exists()); + assert!(initial_folder.meta_path().unwrap().exists()); + + // Create a zip file from the folder + let output_zip_path = temp_folder.path().join("output.zip"); + let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); + initial_folder.zip_from_folder(output_zip_file).unwrap(); + + assert!(output_zip_path.exists()); + + // Verify the output zip contains the same graph + let zip_folder = GraphFolder::new_as_zip(&output_zip_path); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); + + assert_graph_equal(&graph, &decoded_graph); +} + +#[test] +fn test_zip_from_zip() { + let graph = Graph::new(); + graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); + graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); + graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); + + // Create an initial zip file + let temp_folder = tempfile::TempDir::new().unwrap(); + let initial_zip_path = temp_folder.path().join("initial.zip"); + let initial_folder = GraphFolder::new_as_zip(&initial_zip_path); + graph.encode(&initial_folder).unwrap(); + + assert!(initial_zip_path.exists()); + + // Create a new zip file from the existing zip + let output_zip_path = temp_folder.path().join("output.zip"); + let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); + initial_folder.zip_from_folder(output_zip_file).unwrap(); + + assert!(output_zip_path.exists()); + + // Verify zip file sizes + let initial_size = std::fs::metadata(&initial_zip_path).unwrap().len(); + let output_size = std::fs::metadata(&output_zip_path).unwrap().len(); + assert_eq!(initial_size, output_size); + + // Verify the output zip contains the same graph + let zip_folder = GraphFolder::new_as_zip(&output_zip_path); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); + + assert_graph_equal(&graph, &decoded_graph); +} + +#[test] +fn test_unzip_to_folder() { + let graph = Graph::new(); + + graph + .add_edge(0, 0, 1, [("test prop 1", Prop::map(NO_PROPS))], None) + .unwrap(); + graph + .add_edge( + 1, + 2, + 3, + [("test prop 1", Prop::map([("key", "value")]))], + Some("layer_a"), + ) + .unwrap(); + graph + .add_edge(2, 3, 4, [("test prop 2", "value")], Some("layer_b")) + .unwrap(); + graph + .add_edge(3, 1, 4, [("test prop 3", 10.0)], None) + .unwrap(); + graph + .add_edge(4, 1, 3, [("test prop 4", true)], None) + .unwrap(); + + graph + .node(1) + .unwrap() + .add_updates(5, [("test node prop", 5i32)], None) + .unwrap(); + + let temp_folder = tempfile::TempDir::new().unwrap(); + let folder = temp_folder.path().join("graph"); + let graph_folder = GraphFolder::from(&folder); + + graph.encode(&graph_folder).unwrap(); + assert!(graph_folder.graph_path().unwrap().exists()); + + // Zip the folder + let mut zip_bytes = Vec::new(); + let cursor = std::io::Cursor::new(&mut zip_bytes); + graph_folder.zip_from_folder(cursor).unwrap(); + + // Unzip to a new folder + let folder = temp_folder.path().join("unzip"); + let unzip_folder = GraphFolder::from(&folder); + let cursor = std::io::Cursor::new(&zip_bytes); + unzip_folder.unzip_to_folder(cursor).unwrap(); + + // Verify the extracted folder has the same structure + assert!(unzip_folder.graph_path().unwrap().exists()); + assert!(unzip_folder.meta_path().unwrap().exists()); + + // Verify the extracted graph is the same as the original + let extracted_graph = Graph::decode(&unzip_folder).unwrap(); + assert_graph_equal(&graph, &extracted_graph); +} diff --git a/raphtory/src/python/types/macros/trait_impl/serialise.rs b/raphtory/src/python/types/macros/trait_impl/serialise.rs index c856b11073..41bbd48ef0 100644 --- a/raphtory/src/python/types/macros/trait_impl/serialise.rs +++ b/raphtory/src/python/types/macros/trait_impl/serialise.rs @@ -39,7 +39,8 @@ macro_rules! impl_serialise { /// Returns: /// None: fn save_to_zip(&self, path: PathBuf) -> Result<(), GraphError> { - let folder = $crate::api::core::storage::graph_folder::GraphFolder::new_as_zip(path); + let folder = + $crate::api::core::storage::graph_folder::GraphFolder::new_as_zip(path); $crate::serialise::StableEncode::encode(&self.$field, folder) } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs deleted file mode 100644 index 8cfa9ef4c1..0000000000 --- a/raphtory/src/serialise/graph_folder.rs +++ /dev/null @@ -1,230 +0,0 @@ -use itertools::Itertools; -use serde::{Deserialize, Serialize}; -use std::io::{Read, Seek, Write}; - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - db::graph::graph::assert_graph_equal, prelude::*, serialise::serialise::StableDecode, - }; - use raphtory_api::core::storage::graph_folder::GraphPaths; - - // /// Verify that the metadata is re-created if it does not exist. - // #[test] - // #[ignore = "Need to think about how to deal with reading old format"] - // fn test_read_metadata_from_noninitialized_zip() { - // global_info_logger(); - // - // let graph = Graph::new(); - // graph.add_node(0, 0, NO_PROPS, None).unwrap(); - // - // let tmp_dir = tempfile::TempDir::new().unwrap(); - // let zip_path = tmp_dir.path().join("graph.zip"); - // let folder = GraphFolder::new_as_zip(&zip_path); - // graph.encode(&folder).unwrap(); - // - // // Remove the metadata file from the zip to simulate a noninitialized zip - // remove_metadata_from_zip(&zip_path); - // - // // Should fail because the metadata file is not present - // let err = folder.try_read_metadata(); - // assert!(err.is_err()); - // - // // Should re-create the metadata file - // let result = folder.read_metadata().unwrap(); - // assert_eq!( - // result, - // GraphMetadata { - // node_count: 1, - // edge_count: 0, - // metadata: vec![], - // graph_type: GraphType::EventGraph, - // is_diskgraph: false - // } - // ); - // } - - // /// Helper function to remove the metadata file from a zip - // fn remove_metadata_from_zip(zip_path: &Path) { - // let mut zip_file = std::fs::File::open(&zip_path).unwrap(); - // let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); - // let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); - // - // // Scope for the zip writer - // { - // let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); - // - // for i in 0..zip_archive.len() { - // let mut file = zip_archive.by_index(i).unwrap(); - // - // // Copy all files except the metadata file - // if file.name() != META_PATH { - // zip_writer - // .start_file::<_, ()>(file.name(), FileOptions::default()) - // .unwrap(); - // std::io::copy(&mut file, &mut zip_writer).unwrap(); - // } - // } - // - // zip_writer.finish().unwrap(); - // } - // - // std::fs::copy(temp_zip.path(), &zip_path).unwrap(); - // } - - // /// Verify that the metadata is re-created if it does not exist. - // #[test] - // #[ignore = "Need to think about how to handle reading from old format"] - // fn test_read_metadata_from_noninitialized_folder() { - // global_info_logger(); - // - // let graph = Graph::new(); - // graph.add_node(0, 0, NO_PROPS, None).unwrap(); - // - // let temp_folder = tempfile::TempDir::new().unwrap(); - // let folder = GraphFolder::from(temp_folder.path()); - // graph.encode(&folder).unwrap(); - // - // // Remove the metadata file - // std::fs::remove_file(folder.get_meta_path()).unwrap(); - // - // // Should fail because the metadata file is not present - // let err = folder.try_read_metadata(); - // assert!(err.is_err()); - // - // // Should re-create the metadata file - // let result = folder.read_metadata().unwrap(); - // assert_eq!( - // result, - // GraphMetadata { - // node_count: 1, - // edge_count: 0, - // metadata: vec![], - // graph_type: GraphType::EventGraph, - // is_diskgraph: false - // } - // ); - // } - #[test] - fn test_zip_from_folder() { - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); - graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); - graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); - - // Create a regular folder and encode the graph - let temp_folder = tempfile::TempDir::new().unwrap(); - let initial_folder = GraphFolder::from(temp_folder.path().join("initial")); - graph.encode(&initial_folder).unwrap(); - - assert!(initial_folder.graph_path().unwrap().exists()); - assert!(initial_folder.meta_path().unwrap().exists()); - - // Create a zip file from the folder - let output_zip_path = temp_folder.path().join("output.zip"); - let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); - initial_folder.zip_from_folder(output_zip_file).unwrap(); - - assert!(output_zip_path.exists()); - - // Verify the output zip contains the same graph - let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder).unwrap(); - - assert_graph_equal(&graph, &decoded_graph); - } - - #[test] - fn test_zip_from_zip() { - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None, None).unwrap(); - graph.add_node(1, 1, NO_PROPS, None, None).unwrap(); - graph.add_edge(0, 0, 1, NO_PROPS, None).unwrap(); - - // Create an initial zip file - let temp_folder = tempfile::TempDir::new().unwrap(); - let initial_zip_path = temp_folder.path().join("initial.zip"); - let initial_folder = GraphFolder::new_as_zip(&initial_zip_path); - graph.encode(&initial_folder).unwrap(); - - assert!(initial_zip_path.exists()); - - // Create a new zip file from the existing zip - let output_zip_path = temp_folder.path().join("output.zip"); - let output_zip_file = std::fs::File::create(&output_zip_path).unwrap(); - initial_folder.zip_from_folder(output_zip_file).unwrap(); - - assert!(output_zip_path.exists()); - - // Verify zip file sizes - let initial_size = std::fs::metadata(&initial_zip_path).unwrap().len(); - let output_size = std::fs::metadata(&output_zip_path).unwrap().len(); - assert_eq!(initial_size, output_size); - - // Verify the output zip contains the same graph - let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder).unwrap(); - - assert_graph_equal(&graph, &decoded_graph); - } - - #[test] - fn test_unzip_to_folder() { - let graph = Graph::new(); - - graph - .add_edge(0, 0, 1, [("test prop 1", Prop::map(NO_PROPS))], None) - .unwrap(); - graph - .add_edge( - 1, - 2, - 3, - [("test prop 1", Prop::map([("key", "value")]))], - Some("layer_a"), - ) - .unwrap(); - graph - .add_edge(2, 3, 4, [("test prop 2", "value")], Some("layer_b")) - .unwrap(); - graph - .add_edge(3, 1, 4, [("test prop 3", 10.0)], None) - .unwrap(); - graph - .add_edge(4, 1, 3, [("test prop 4", true)], None) - .unwrap(); - - graph - .node(1) - .unwrap() - .add_updates(5, [("test node prop", 5i32)], None) - .unwrap(); - - let temp_folder = tempfile::TempDir::new().unwrap(); - let folder = temp_folder.path().join("graph"); - let graph_folder = GraphFolder::from(&folder); - - graph.encode(&graph_folder).unwrap(); - assert!(graph_folder.graph_path().unwrap().exists()); - - // Zip the folder - let mut zip_bytes = Vec::new(); - let cursor = std::io::Cursor::new(&mut zip_bytes); - graph_folder.zip_from_folder(cursor).unwrap(); - - // Unzip to a new folder - let folder = temp_folder.path().join("unzip"); - let unzip_folder = GraphFolder::from(&folder); - let cursor = std::io::Cursor::new(&zip_bytes); - unzip_folder.unzip_to_folder(cursor).unwrap(); - - // Verify the extracted folder has the same structure - assert!(unzip_folder.graph_path().unwrap().exists()); - assert!(unzip_folder.meta_path().unwrap().exists()); - - // Verify the extracted graph is the same as the original - let extracted_graph = Graph::decode(&unzip_folder).unwrap(); - assert_graph_equal(&graph, &extracted_graph); - } -} diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index 7b49f01200..93185d3328 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -1,9 +1,7 @@ -mod graph_folder; pub mod metadata; pub mod parquet; mod serialise; -pub use graph_folder::*; pub use serialise::{StableDecode, StableEncode}; From f7535c20926dbdd551ea4845ab4f680637dc8493 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 26 Jun 2026 05:18:35 -0400 Subject: [PATCH 18/20] Change tmp file to NamedTempFile to keep fix from commit f67d5617 --- Cargo.lock | 1 + raphtory-api/Cargo.toml | 3 ++- raphtory-api/src/core/storage/graph_folder.rs | 10 ++++++---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 88d0e7711f..d477fd3479 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6442,6 +6442,7 @@ dependencies = [ "serde", "serde_arrow", "serde_json", + "tempfile", "thiserror 2.0.18", "tracing", "tracing-subscriber", diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 253c669963..5973f791f7 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -19,6 +19,7 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, optional = true } zip = { workspace = true, optional = true } walkdir = { workspace = true, optional = true } +tempfile = { workspace = true, optional = true } bigdecimal = { workspace = true, features = ["string-only"] } thiserror = { workspace = true } bytemuck = { workspace = true } @@ -61,4 +62,4 @@ python = [ vectors = [] template = ["dep:minijinja"] search = [] -io = ["dep:serde_json", "dep:zip", "dep:walkdir"] +io = ["dep:serde_json", "dep:zip", "dep:walkdir", "dep:tempfile"] diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index ef00cd66c7..bd0412c5a1 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -23,6 +23,8 @@ use std::{ io::{ErrorKind, Read, Seek, Write}, }; #[cfg(feature = "io")] +use tempfile::NamedTempFile; +#[cfg(feature = "io")] use walkdir::WalkDir; #[cfg(feature = "io")] use zip::{write::FileOptions, ZipArchive, ZipWriter}; @@ -62,10 +64,10 @@ pub struct GraphMetadata { impl Metadata { /// Atomically write this metadata into the data folder at `data_path` pub fn write_atomic(&self, data_path: &Path, meta_path: &Path) -> std::io::Result<()> { - let tmp_path = data_path.join(".tmp"); - let tmp_file = File::create(&tmp_path)?; - serde_json::to_writer(tmp_file, self).map_err(std::io::Error::other)?; - fs::rename(tmp_path, meta_path)?; + let mut tmp_file = NamedTempFile::new_in(data_path)?; + serde_json::to_writer(&mut tmp_file, self).map_err(std::io::Error::other)?; + tmp_file.as_file().sync_all()?; + tmp_file.persist(meta_path).map_err(io::Error::from)?; Ok(()) } } From 5998f9e810531eeba38fda9ed08e4e335dcc7ad4 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 26 Jun 2026 11:16:40 -0400 Subject: [PATCH 19/20] Cleanup after merge. Add SystemTimeError to GraphFolderError --- raphtory-api/src/core/storage/graph_folder.rs | 20 +++++++++++++++---- .../src/model/graph/meta_graph.rs | 4 ++-- raphtory-graphql/src/paths.rs | 2 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index ead727b1aa..d38cfa8a78 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -10,7 +10,7 @@ //! ├── index/ # Search indexes (optional) //! └── vectors/ # Vector embeddings (optional) -use crate::{core::input::input_node::parse_u64_strict, GraphType, to_millis::ToMillis}; +use crate::{core::input::input_node::parse_u64_strict, to_millis::ToMillis, GraphType}; use itertools::Itertools; use serde::{Deserialize, Serialize}; #[cfg(feature = "io")] @@ -21,6 +21,7 @@ use std::{ use std::{ io, io::{ErrorKind, Read, Seek, Write}, + time::SystemTimeError, }; #[cfg(feature = "io")] use tempfile::NamedTempFile; @@ -78,22 +79,33 @@ impl Metadata { pub enum GraphFolderError { #[error(transparent)] Io(#[from] std::io::Error), + #[error(transparent)] Serde(#[from] serde_json::Error), + #[error("zip operation failed: {0}")] Zip(#[from] zip::result::ZipError), + #[error("Path {0} is not a valid relative data path")] InvalidRelativePath(String), + #[error("Not a zip archive")] NotAZip, + #[error("Cannot write graph into non empty folder {0}")] NonEmptyGraphFolder(PathBuf), + #[error("Graph folder is not initialised for writing")] NoWriteInProgress, + #[error("Cannot swap zipped graph data")] ZippedGraphCannotBeSwapped, + #[error("IO operation failed: {0}")] IOErrorMsg(String), + + #[error("System time error: {0}")] + SystemTimeError(#[from] SystemTimeError), } pub fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphFolderError> { @@ -322,15 +334,15 @@ pub trait GraphPaths { Ok(()) } - fn created(&self) -> Result { + fn created(&self) -> Result { Ok(self.root_meta_path().metadata()?.created()?.to_millis()?) } - fn last_updated(&self) -> Result { + fn last_updated(&self) -> Result { Ok(fs::metadata(self.meta_path()?)?.modified()?.to_millis()?) } - fn last_opened(&self) -> Result { + fn last_opened(&self) -> Result { Ok(fs::metadata(self.meta_path()?)?.accessed()?.to_millis()?) } } diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 3127b781c8..11694ad7b1 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -10,7 +10,7 @@ use raphtory::{ db::api::storage::storage::read_constant_graph_properties, errors::GraphError, prelude::{GraphViewOps, PropertiesOps}, - serialise::parquet::decode_graph_metadata, + serialise::{metadata::build_graph_metadata, parquet::decode_graph_metadata}, }; use raphtory_api::core::storage::graph_folder::{GraphMetadata, GraphPaths}; use std::{cmp::Ordering, path::PathBuf, sync::Arc}; @@ -64,7 +64,7 @@ impl MetaGraph { .get_or_try_init(|| async { match data.get_cached_graph(self.folder.local_path()).await { None => self.folder.read_metadata_async().await, - Some(graph) => Ok(GraphMetadata::from_graph(graph)), + Some(graph) => Ok(build_graph_metadata(graph)), } }) .await?) diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index bd5d43c2e0..b551bff7f1 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -1,5 +1,5 @@ use crate::{ - data::{WorkDirGuard, WorkDirWriteGuard, DIRTY_PATH}, + data::{WorkDirGuard, WorkDirWriteGuard}, model::blocking_io, rayon::blocking_compute, }; From 66ec85ee1b2dcb2806a02f66b7d064759babc4b6 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 29 Jun 2026 04:49:50 -0400 Subject: [PATCH 20/20] Get rid of "io" feature in raphtory-api --- db4-storage/Cargo.toml | 2 +- raphtory-api/Cargo.toml | 9 ++++----- raphtory-api/src/core/entities/properties/prop/mod.rs | 1 - raphtory-api/src/core/storage/graph_folder.rs | 10 +--------- raphtory/Cargo.toml | 2 +- raphtory/src/serialise/metadata.rs | 3 --- 6 files changed, 7 insertions(+), 20 deletions(-) diff --git a/db4-storage/Cargo.toml b/db4-storage/Cargo.toml index 12194afab2..ca8874780b 100644 --- a/db4-storage/Cargo.toml +++ b/db4-storage/Cargo.toml @@ -11,7 +11,7 @@ rust-version.workspace = true edition = "2024" [dependencies] -raphtory-api = { workspace = true, features = ["io"] } +raphtory-api.workspace = true raphtory-api-macros.workspace = true raphtory-core = { workspace = true } raphtory-itertools.workspace = true diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 5973f791f7..1a4c6fdaea 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -16,10 +16,10 @@ edition.workspace = true [dependencies] serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true, optional = true } -zip = { workspace = true, optional = true } -walkdir = { workspace = true, optional = true } -tempfile = { workspace = true, optional = true } +serde_json = { workspace = true } +zip = { workspace = true } +walkdir = { workspace = true } +tempfile = { workspace = true } bigdecimal = { workspace = true, features = ["string-only"] } thiserror = { workspace = true } bytemuck = { workspace = true } @@ -62,4 +62,3 @@ python = [ vectors = [] template = ["dep:minijinja"] search = [] -io = ["dep:serde_json", "dep:zip", "dep:walkdir", "dep:tempfile"] diff --git a/raphtory-api/src/core/entities/properties/prop/mod.rs b/raphtory-api/src/core/entities/properties/prop/mod.rs index 4f563cdf57..150c9f1844 100644 --- a/raphtory-api/src/core/entities/properties/prop/mod.rs +++ b/raphtory-api/src/core/entities/properties/prop/mod.rs @@ -5,7 +5,6 @@ mod prop_enum; mod prop_ref_enum; mod prop_type; mod prop_unwrap; -#[cfg(feature = "io")] mod serde; #[cfg(feature = "template")] diff --git a/raphtory-api/src/core/storage/graph_folder.rs b/raphtory-api/src/core/storage/graph_folder.rs index d38cfa8a78..4a7cdf7c5c 100644 --- a/raphtory-api/src/core/storage/graph_folder.rs +++ b/raphtory-api/src/core/storage/graph_folder.rs @@ -13,21 +13,15 @@ use crate::{core::input::input_node::parse_u64_strict, to_millis::ToMillis, GraphType}; use itertools::Itertools; use serde::{Deserialize, Serialize}; -#[cfg(feature = "io")] use std::{ fs::{self, File}, - path::{Path, PathBuf}, -}; -use std::{ io, io::{ErrorKind, Read, Seek, Write}, + path::{Path, PathBuf}, time::SystemTimeError, }; -#[cfg(feature = "io")] use tempfile::NamedTempFile; -#[cfg(feature = "io")] use walkdir::WalkDir; -#[cfg(feature = "io")] use zip::{write::FileOptions, ZipArchive, ZipWriter}; /// Metadata file that stores path to the data folder. @@ -61,7 +55,6 @@ pub struct GraphMetadata { pub is_diskgraph: bool, } -#[cfg(feature = "io")] impl Metadata { /// Atomically write this metadata into the data folder at `data_path` pub fn write_atomic(&self, data_path: &Path, meta_path: &Path) -> std::io::Result<()> { @@ -74,7 +67,6 @@ impl Metadata { } /// Errors returned by the graph folder path operations (the `GraphPaths` trait and its helpers) -#[cfg(feature = "io")] #[derive(thiserror::Error, Debug)] pub enum GraphFolderError { #[error(transparent)] diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index 9214d462fb..ddc0c753bf 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -15,7 +15,7 @@ homepage.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -raphtory-api = { workspace = true, features = ["io"] } +raphtory-api.workspace = true raphtory-core.workspace = true raphtory-storage.workspace = true raphtory-itertools.workspace = true diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 2bbc635291..aa4d437064 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -20,9 +20,6 @@ pub fn build_graph_metadata(graph: impl GraphView) -> GraphMetadata { /// Encode `graph`'s data into a fresh directory inside `folder` and atomically point the folder's /// metadata at it, deleting any previously-stored graph data. -/// -/// The graph-data encoding lives in `raphtory` because the `ParquetEncoder` / -/// `GraphView` traits aren't available in `raphtory-api` pub fn replace_graph_in_folder( folder: &InnerGraphFolder, graph: impl ParquetEncoder + GraphView + std::fmt::Debug,