diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 27b2d94..a1b3b61 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,10 +19,32 @@ jobs: - nightly steps: - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y libssl-dev pkg-config libsuitesparse-dev - - run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }} - - run: cargo build --verbose - - run: cargo test --verbose + sudo apt-get install -y cmake libclang-dev clang + + - name: Build and install SuiteSparse:GraphBLAS + run: | + git clone --depth 1 https://github.com/DrTimothyAldenDavis/GraphBLAS.git + cd GraphBLAS + make compact + sudo make install + + - name: Build and install LAGraph + run: | + cd deps/LAGraph + make + sudo make install + + - name: Install Rust toolchain + run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }} + + - name: Build (with regenerated bindings) + run: cargo build --features regenerate-bindings --verbose + + - name: Test + run: LD_LIBRARY_PATH=/usr/local/lib cargo test --verbose diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ca3fb6e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "deps/LAGraph"] + path = deps/LAGraph + url = https://github.com/SparseLinearAlgebra/LAGraph.git diff --git a/Cargo.toml b/Cargo.toml index 879747c..70180c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,14 @@ version = "0.1.0" edition = "2024" [dependencies] +csv = "1.4.0" +libc = "0.2" +oxrdf = "0.3.3" +oxttl = "0.2.3" +thiserror = "1.0" + +[features] +regenerate-bindings = ["bindgen"] + +[build-dependencies] +bindgen = { version = "0.71", optional = true } diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..442109c --- /dev/null +++ b/build.rs @@ -0,0 +1,99 @@ +#[cfg(feature = "regenerate-bindings")] +use std::path::PathBuf; + +fn main() { + println!("cargo:rustc-link-lib=dylib=graphblas"); + println!("cargo:rustc-link-search=native=/usr/local/lib"); + println!("cargo:rustc-link-lib=dylib=lagraph"); + println!("cargo:rustc-link-search=native=deps/LAGraph/build/src"); + println!("cargo:rustc-link-lib=dylib=lagraphx"); + println!("cargo:rustc-link-search=native=deps/LAGraph/build/experimental"); + + // ---- Bindgen (only with `regenerate-bindings` feature) ---- + #[cfg(feature = "regenerate-bindings")] + regenerate_bindings(); + + println!("cargo:rerun-if-changed=build.rs"); +} + +#[cfg(feature = "regenerate-bindings")] +fn regenerate_bindings() { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + + let lagraph_include = manifest_dir.join("deps/LAGraph/include"); + assert!( + lagraph_include.join("LAGraph.h").exists(), + "LAGraph.h not found at {}.\n\ + Fetch the submodule:\n git submodule update --init --recursive", + lagraph_include.display() + ); + + let graphblas_include = [ + PathBuf::from("/usr/local/include/suitesparse"), + PathBuf::from("/usr/include/suitesparse"), + ] + .into_iter() + .find(|p| p.join("GraphBLAS.h").exists()) + .unwrap_or_else(|| { + panic!( + "GraphBLAS.h not found.\n\ + Install SuiteSparse:GraphBLAS so headers are in /usr/local/include/suitesparse." + ) + }); + + let bindings = bindgen::Builder::default() + .header( + lagraph_include + .join("LAGraph.h") + .to_str() + .expect("non-utf8 header path"), + ) + .header( + lagraph_include + .join("LAGraphX.h") + .to_str() + .expect("non-utf8 header path"), + ) + .clang_arg(format!("-I{}", graphblas_include.display())) + .clang_arg(format!("-I{}", lagraph_include.display())) + .allowlist_type("GrB_Index") + .allowlist_type("GrB_Matrix") + .allowlist_type("GrB_Vector") + .allowlist_item("GrB_BOOL") + .allowlist_item("GrB_LOR") + .allowlist_item("GrB_LOR_LAND_SEMIRING_BOOL") + .allowlist_item("GrB_Info") + .allowlist_function("GrB_Matrix_new") + .allowlist_function("GrB_Matrix_nvals") + .allowlist_function("GrB_Matrix_free") + .allowlist_function("GrB_Matrix_build_BOOL") + .allowlist_function("GrB_Vector_new") + .allowlist_function("GrB_Vector_free") + .allowlist_function("GrB_Vector_setElement_BOOL") + .allowlist_function("GrB_Vector_nvals") + .allowlist_function("GrB_Vector_extractTuples_BOOL") + .allowlist_function("GrB_vxm") + .allowlist_type("LAGraph_Graph") + .allowlist_type("LAGraph_Kind") + .allowlist_function("LAGraph_Init") + .allowlist_function("LAGraph_Finalize") + .allowlist_function("LAGraph_New") + .allowlist_function("LAGraph_Delete") + .allowlist_function("LAGraph_Cached_AT") + .allowlist_function("LAGraph_MMRead") + .default_enum_style(bindgen::EnumVariation::Rust { + non_exhaustive: false, + }) + .derive_debug(true) + .derive_copy(true) + .layout_tests(false) + // Suppress C-language doc comments so rustdoc does not attempt to + // compile them as Rust doctests. + .generate_comments(false) + .generate() + .expect("bindgen failed to generate bindings"); + + bindings + .write_to_file(manifest_dir.join("src/lagraph_sys_generated.rs")) + .expect("failed to write bindgen output to src/lagraph_sys_generated.rs"); +} diff --git a/deps/LAGraph b/deps/LAGraph new file mode 160000 index 0000000..bc00497 --- /dev/null +++ b/deps/LAGraph @@ -0,0 +1 @@ +Subproject commit bc004979161db08389f52e2eff1e84e5cac42a64 diff --git a/src/formats/csv.rs b/src/formats/csv.rs new file mode 100644 index 0000000..0175565 --- /dev/null +++ b/src/formats/csv.rs @@ -0,0 +1,242 @@ +//! CSV edge iterator for the formats layer. + +use std::io::Read; + +use csv::StringRecord; + +use crate::formats::FormatError; +use crate::graph::{Edge}; + +#[derive(Debug, Clone)] +pub enum ColumnSpec { + Index(usize), + Name(String), +} + +#[derive(Debug, Clone)] +pub struct CsvConfig { + pub source_column: ColumnSpec, + pub target_column: ColumnSpec, + pub label_column: ColumnSpec, + pub has_header: bool, + pub delimiter: u8, +} + +impl Default for CsvConfig { + fn default() -> Self { + Self { + source_column: ColumnSpec::Index(0), + target_column: ColumnSpec::Index(1), + label_column: ColumnSpec::Index(2), + has_header: true, + delimiter: b',', + } + } +} + +/// An iterator that reads CSV records and yields `Result`. +/// +/// # Example +/// +/// ```no_run +/// use pathrex::formats::csv::Csv; +/// use std::fs::File; +/// +/// let file = File::open("edges.csv").unwrap(); +/// let iter = Csv::from_reader(file).unwrap(); +/// for result in iter { +/// let edge = result.unwrap(); +/// println!("{} --{}--> {}", edge.source, edge.label, edge.target); +/// } +/// ``` +pub struct Csv { + records: csv::StringRecordsIntoIter, + source_idx: usize, + target_idx: usize, + label_idx: usize, +} + +impl Csv { + pub fn new(reader: R, config: CsvConfig) -> Result { + let mut csv_reader = csv::ReaderBuilder::new() + .has_headers(config.has_header) + .delimiter(config.delimiter) + .from_reader(reader); + + let (source_idx, target_idx, label_idx) = if config.has_header { + let headers = csv_reader.headers()?.clone(); + let resolve = |spec: &ColumnSpec| -> Result { + match spec { + ColumnSpec::Index(i) => Ok(*i), + ColumnSpec::Name(name) => headers + .iter() + .position(|h| h == name) + .ok_or_else(|| FormatError::MissingColumn { name: name.clone() }), + } + }; + ( + resolve(&config.source_column)?, + resolve(&config.target_column)?, + resolve(&config.label_column)?, + ) + } else { + let index_only = |spec: &ColumnSpec| -> Result { + match spec { + ColumnSpec::Index(i) => Ok(*i), + ColumnSpec::Name(name) => { + Err(FormatError::MissingColumn { name: name.clone() }) + } + } + }; + ( + index_only(&config.source_column)?, + index_only(&config.target_column)?, + index_only(&config.label_column)?, + ) + }; + + Ok(Self { + records: csv_reader.into_records(), + source_idx, + target_idx, + label_idx, + }) + } + + pub fn from_reader(reader: R) -> Result { + Self::new(reader, CsvConfig::default()) + } + + fn get_field(record: &StringRecord, idx: usize) -> Result { + record + .get(idx) + .map(str::to_owned) + .ok_or_else(|| FormatError::MissingColumn { + name: format!("index {idx}"), + }) + } +} + +impl Iterator for Csv { + type Item = Result; + + fn next(&mut self) -> Option { + let record = match self.records.next()? { + Ok(r) => r, + Err(e) => return Some(Err(FormatError::Csv(e))), + }; + + Some((|| { + let source = Self::get_field(&record, self.source_idx)?; + let target = Self::get_field(&record, self.target_idx)?; + let label = Self::get_field(&record, self.label_idx)?; + Ok(Edge { source, target, label }) + })()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_csv(content: &str) -> Csv<&[u8]> { + Csv::from_reader(content.as_bytes()).expect("should construct iterator") + } + + #[test] + fn test_basic_csv_with_header() { + let csv = "source,target,label\nA,B,knows\nB,C,likes\n"; + let edges: Vec<_> = make_csv(csv).collect(); + + assert_eq!(edges.len(), 2); + let e0 = edges[0].as_ref().unwrap(); + assert_eq!(e0.source, "A"); + assert_eq!(e0.target, "B"); + assert_eq!(e0.label, "knows"); + } + + #[test] + fn test_named_columns() { + let csv = "from,to,rel\nAlice,Bob,knows\n"; + let config = CsvConfig { + source_column: ColumnSpec::Name("from".to_string()), + target_column: ColumnSpec::Name("to".to_string()), + label_column: ColumnSpec::Name("rel".to_string()), + ..CsvConfig::default() + }; + let iter = Csv::new(csv.as_bytes(), config).unwrap(); + let edges: Vec<_> = iter.collect(); + + assert_eq!(edges.len(), 1); + let e = edges[0].as_ref().unwrap(); + assert_eq!(e.source, "Alice"); + assert_eq!(e.target, "Bob"); + assert_eq!(e.label, "knows"); + } + + #[test] + fn test_missing_named_column_returns_error() { + let csv = "source,target,label\nA,B,knows\n"; + let config = CsvConfig { + source_column: ColumnSpec::Name("nonexistent".to_string()), + ..CsvConfig::default() + }; + let result = Csv::new(csv.as_bytes(), config); + assert!( + matches!(result, Err(FormatError::MissingColumn { name }) if name == "nonexistent") + ); + } + + #[test] + fn test_custom_delimiter() { + let csv = "source\ttarget\tlabel\nX\tY\tedge\n"; + let config = CsvConfig { + delimiter: b'\t', + ..CsvConfig::default() + }; + let iter = Csv::new(csv.as_bytes(), config).unwrap(); + let edges: Vec<_> = iter.collect(); + + assert_eq!(edges.len(), 1); + let e = edges[0].as_ref().unwrap(); + assert_eq!(e.source, "X"); + assert_eq!(e.target, "Y"); + assert_eq!(e.label, "edge"); + } + + #[test] + fn test_no_header_with_index_columns() { + let csv = "A,B,knows\nC,D,likes\n"; + let config = CsvConfig { + has_header: false, + ..CsvConfig::default() + }; + let iter = Csv::new(csv.as_bytes(), config).unwrap(); + let edges: Vec<_> = iter.collect(); + + assert_eq!(edges.len(), 2); + assert_eq!(edges[0].as_ref().unwrap().source, "A"); + assert_eq!(edges[1].as_ref().unwrap().source, "C"); + } + + #[test] + fn test_empty_csv_yields_no_edges() { + let csv = "source,target,label\n"; + let edges: Vec<_> = make_csv(csv).collect(); + assert!(edges.is_empty()); + } + + #[test] + fn test_graph_source_impl() { + use crate::graph::{GraphBuilder, GraphDecomposition, InMemoryBuilder}; + + let csv = "source,target,label\nA,B,knows\nB,C,likes\nC,A,knows\n"; + let iter = Csv::from_reader(csv.as_bytes()).unwrap(); + let graph = InMemoryBuilder::default() + .load(iter) + .unwrap() + .build() + .unwrap(); + assert_eq!(graph.num_nodes(), 3); + } +} diff --git a/src/formats/mod.rs b/src/formats/mod.rs new file mode 100644 index 0000000..4aafb79 --- /dev/null +++ b/src/formats/mod.rs @@ -0,0 +1,36 @@ +//! Data format parsers for pathrex. +//! +//! # Quick-start examples +//! +//! ```no_run +//! use pathrex::graph::{Graph, InMemory, GraphDecomposition}; +//! use pathrex::formats::Csv; +//! use std::fs::File; +//! +//! // Build from CSV in one line +//! let g = Graph::::try_from( +//! Csv::from_reader(File::open("edges.csv").unwrap()).unwrap() +//! ).unwrap(); +//! ``` + +pub mod csv; + +pub use csv::Csv; + +use thiserror::Error; + +/// Unified error type for all format parsing operations. +#[derive(Error, Debug)] +pub enum FormatError { + /// An error produced by the `csv` crate during parsing. + #[error("CSV error: {0}")] + Csv(#[from] ::csv::Error), + + /// A required column was not found in the CSV header row. + #[error("Missing CSV column '{name}'")] + MissingColumn { name: String }, + + /// An I/O error occurred while reading the data source. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), +} diff --git a/src/graph/inmemory.rs b/src/graph/inmemory.rs new file mode 100644 index 0000000..f03fad0 --- /dev/null +++ b/src/graph/inmemory.rs @@ -0,0 +1,314 @@ +use std::{collections::HashMap, io::Read}; +use std::sync::Arc; + +use crate::formats::Csv; +use crate::{graph::GraphSource, lagraph_sys::{ + GrB_BOOL, GrB_Index, GrB_Info, GrB_LOR, GrB_Matrix, GrB_Matrix_build_BOOL, GrB_Matrix_free, GrB_Matrix_new, LAGraph_Graph, LAGraph_Kind, LAGraph_New +}}; + +use super::{ + Backend, Edge, GraphBuilder, GraphDecomposition, GraphError, LagraphGraph, + ensure_grb_init, grb_ok, +}; + +/// Marker type for the in-memory GraphBLAS-backed backend. +/// +/// ```no_run +/// use pathrex::graph::{Graph, InMemory, GraphDecomposition}; +/// use pathrex::formats::csv::Csv; +/// use std::fs::File; +/// +/// let graph = Graph::::try_from( +/// Csv::from_reader(File::open("edges.csv").unwrap()).unwrap() +/// ).unwrap(); +/// println!("Nodes: {}", graph.num_nodes()); +/// ``` +pub struct InMemory; + +impl Backend for InMemory { + type Graph = InMemoryGraph; + type Builder = InMemoryBuilder; +} + +/// Accumulates edges in RAM and compiles them into an [`InMemoryGraph`]. +pub struct InMemoryBuilder { + node_to_id: HashMap, + id_to_node: Vec, + label_buffers: HashMap>, + prebuilt: HashMap, +} + +impl InMemoryBuilder { + pub fn new() -> Self { + Self { + node_to_id: HashMap::new(), + id_to_node: Vec::new(), + label_buffers: HashMap::new(), + prebuilt: HashMap::new(), + } + } + + fn insert_node(&mut self, node: &str) -> u64 { + if let Some(&id) = self.node_to_id.get(node) { + return id; + } + let id = self.id_to_node.len() as u64; + self.id_to_node.push(node.to_owned()); + self.node_to_id.insert(node.to_owned(), id); + id + } + + pub fn push_edge(&mut self, edge: Edge) -> Result<(), GraphError> { + let src = self.insert_node(&edge.source); + let tgt = self.insert_node(&edge.target); + self.label_buffers + .entry(edge.label) + .or_default() + .push((src, tgt)); + Ok(()) + } + + pub fn with_stream(&mut self, stream: I) -> Result<(), GraphError> + where + I: IntoIterator>, + GraphError: From, + { + for item in stream { + self.push_edge(item?)?; + } + Ok(()) + } + + /// Accept a pre-built [`GrB_Matrix`] for `label`, wrapping it in an + /// [`LAGraph_Graph`] immediately. + pub fn push_grb_matrix( + &mut self, + label: impl Into, + mut matrix: GrB_Matrix, + ) -> Result<(), GraphError> { + ensure_grb_init()?; + let lg: LagraphGraph = unsafe { + let mut g: LAGraph_Graph = std::ptr::null_mut(); + let mut msg = [0i8; 256]; + let info = LAGraph_New( + &mut g, + &mut matrix, + LAGraph_Kind::LAGraph_ADJACENCY_DIRECTED, + msg.as_mut_ptr(), + ); + if info != GrB_Info::GrB_SUCCESS as i32 { + if !matrix.is_null() { + GrB_Matrix_free(&mut matrix); + } + return Err(GraphError::GraphBlas(info)); + } + LagraphGraph { inner: g } + }; + self.prebuilt.insert(label.into(), lg); + Ok(()) + } +} + +impl Default for InMemoryBuilder { + fn default() -> Self { + Self::new() + } +} + +impl GraphBuilder for InMemoryBuilder { + type Graph = InMemoryGraph; + type Error = GraphError; + + fn build(self) -> Result { + ensure_grb_init()?; + + let n = self.id_to_node.len() as GrB_Index; + + let mut graphs: HashMap> = + HashMap::with_capacity(self.label_buffers.len() + self.prebuilt.len()); + + for (label, lg) in self.prebuilt { + graphs.insert(label, Arc::new(lg)); + } + + for (label, pairs) in &self.label_buffers { + let rows: Vec = pairs.iter().map(|(r, _)| *r).collect(); + let cols: Vec = pairs.iter().map(|(_, c)| *c).collect(); + let vals: Vec = vec![true; pairs.len()]; + let nvals = pairs.len() as GrB_Index; + + let grb_matrix: GrB_Matrix = unsafe { + let mut m: GrB_Matrix = std::ptr::null_mut(); + grb_ok(GrB_Matrix_new(&mut m, GrB_BOOL, n, n) as i32)?; + grb_ok(GrB_Matrix_build_BOOL( + m, + rows.as_ptr(), + cols.as_ptr(), + vals.as_ptr(), + nvals, + GrB_LOR, + ) as i32)?; + m + }; + + let lg: LagraphGraph = unsafe { + let mut g: LAGraph_Graph = std::ptr::null_mut(); + let mut a = grb_matrix; + let mut msg = [0i8; 256]; + let info = LAGraph_New( + &mut g, + &mut a, + LAGraph_Kind::LAGraph_ADJACENCY_DIRECTED, + msg.as_mut_ptr(), + ); + if info != GrB_Info::GrB_SUCCESS as i32 { + if !a.is_null() { + GrB_Matrix_free(&mut a); + } + return Err(GraphError::GraphBlas(info)); + } + LagraphGraph { inner: g } + }; + + graphs.insert(label.clone(), Arc::new(lg)); + } + + let node_to_id: HashMap = self + .node_to_id + .into_iter() + .map(|(k, v)| (k, v as usize)) + .collect(); + + Ok(InMemoryGraph { + node_to_id, + id_to_node: self.id_to_node, + graphs, + }) + } +} + +/// Immutable, read-only Boolean-decomposed graph backed by LAGraph graphs. +pub struct InMemoryGraph { + node_to_id: HashMap, + id_to_node: Vec, + graphs: HashMap>, +} + +impl GraphDecomposition for InMemoryGraph { + type Error = GraphError; + + fn get_graph(&self, label: &str) -> Result, GraphError> { + self.graphs + .get(label) + .cloned() + .ok_or_else(|| GraphError::LabelNotFound(label.to_owned())) + } + + fn get_node_id(&self, string_id: &str) -> Option { + self.node_to_id.get(string_id).copied() + } + + fn get_node_name(&self, mapped_id: usize) -> Option { + self.id_to_node.get(mapped_id).cloned() + } + + fn num_nodes(&self) -> usize { + self.id_to_node.len() + } +} + +impl GraphSource for Csv { + fn apply_to(self, mut builder: InMemoryBuilder) -> Result { + for item in self { + builder.push_edge(item?)?; + } + Ok(builder) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::graph::{GraphBuilder, GraphDecomposition}; + + fn make_graph(edges: &[(&str, &str, &str)]) -> InMemoryGraph { + let mut builder = InMemoryBuilder::new(); + for &(src, tgt, lbl) in edges { + builder + .push_edge(Edge { + source: src.to_owned(), + target: tgt.to_owned(), + label: lbl.to_owned(), + }) + .expect("push_edge should not fail"); + } + builder.build().expect("build should succeed") + } + + #[test] + fn test_node_dictionary_round_trip() { + let graph = make_graph(&[("Alice", "Bob", "knows"), ("Bob", "Charlie", "knows")]); + + assert_eq!(graph.num_nodes(), 3); + + for name in &["Alice", "Bob", "Charlie"] { + let id = graph.get_node_id(name).expect("node should exist"); + assert!(id < 3); + assert_eq!(graph.get_node_name(id).as_deref(), Some(*name)); + } + + assert!(graph.get_node_id("NonExistent").is_none()); + assert!(graph.get_node_name(999).is_none()); + } + + #[test] + fn test_graph_exists_for_each_label() { + let graph = make_graph(&[ + ("A", "B", "knows"), + ("B", "C", "knows"), + ("A", "C", "likes"), + ]); + + assert!(graph.get_graph("knows").is_ok()); + assert!(graph.get_graph("likes").is_ok()); + assert!(matches!( + graph.get_graph("nonexistent"), + Err(GraphError::LabelNotFound(_)) + )); + } + + #[test] + fn test_empty_builder_produces_empty_graph() { + let graph = InMemoryBuilder::new().build().expect("build should succeed"); + assert_eq!(graph.num_nodes(), 0); + assert!(matches!( + graph.get_graph("anything"), + Err(GraphError::LabelNotFound(_)) + )); + } + + #[test] + fn test_self_loop_edge() { + let graph = make_graph(&[("A", "A", "self")]); + assert_eq!(graph.num_nodes(), 1); + assert!(graph.get_graph("self").is_ok()); + } + + #[test] + fn test_with_stream_from_csv() { + use crate::formats::csv::Csv; + + let csv = "source,target,label\nA,B,knows\nB,C,likes\nC,A,knows\n"; + let iter = Csv::from_reader(csv.as_bytes()).unwrap(); + + let graph = InMemoryBuilder::new() + .load(iter) + .expect("load should succeed") + .build() + .expect("build should succeed"); + + assert_eq!(graph.num_nodes(), 3); + assert!(graph.get_graph("knows").is_ok()); + assert!(graph.get_graph("likes").is_ok()); + } +} diff --git a/src/graph/mod.rs b/src/graph/mod.rs new file mode 100644 index 0000000..ced787b --- /dev/null +++ b/src/graph/mod.rs @@ -0,0 +1,241 @@ +//! Core graph abstractions for pathrex. + +pub mod inmemory; + +pub use inmemory::{InMemory, InMemoryBuilder, InMemoryGraph}; + +use std::marker::PhantomData; +use std::sync::{Arc, Once}; + +use crate::lagraph_sys::{ + GrB_BOOL, GrB_Index, GrB_Info, GrB_Vector, GrB_Vector_free, GrB_Vector_new, + LAGraph_Delete, LAGraph_Graph, LAGraph_Init, +}; + +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum GraphError { + /// A GraphBLAS / LAGraph C call returned a non-SUCCESS info code. + #[error("GraphBLAS error: info code {0}")] + GraphBlas(i32), + + /// [`GraphDecomposition::get_graph`] was called with an unknown label. + #[error("Label not found: '{0}'")] + LabelNotFound(String), + + /// [`ensure_grb_init`] was called but `LAGraph_Init` returned a failure code. + #[error("LAGraph initialization failed")] + InitFailed, + + /// A format-layer error propagated through [`GraphBuilder::load`]. + #[error("Format error: {0}")] + Format(#[from] crate::formats::FormatError), +} + +static GRB_INIT: Once = Once::new(); + +pub fn ensure_grb_init() -> Result<(), GraphError> { + let mut result = Ok(()); + GRB_INIT.call_once(|| { + let mut msg = [0i8; 256]; + let info = unsafe { LAGraph_Init(msg.as_mut_ptr()) }; + if info != GrB_Info::GrB_SUCCESS as i32 { + result = Err(GraphError::InitFailed); + } + }); + result +} + +#[inline] +pub fn grb_ok(info: i32) -> Result<(), GraphError> { + if info == GrB_Info::GrB_SUCCESS as i32 { + Ok(()) + } else { + Err(GraphError::GraphBlas(info)) + } +} + +pub struct LagraphGraph { + pub(crate) inner: LAGraph_Graph, +} + +impl Drop for LagraphGraph { + fn drop(&mut self) { + if !self.inner.is_null() { + let mut msg = [0i8; 256]; + unsafe { LAGraph_Delete(&mut self.inner, msg.as_mut_ptr()) }; + } + } +} + +unsafe impl Send for LagraphGraph {} +unsafe impl Sync for LagraphGraph {} + +pub struct GraphblasVector { + pub inner: GrB_Vector, +} + +impl GraphblasVector { + /// Allocate a new N-element boolean `GrB_Vector`. + /// + /// # Safety + /// Caller must ensure LAGraph/GraphBLAS has been initialised via + /// [`ensure_grb_init`]. + pub unsafe fn new_bool(n: GrB_Index) -> Result { + let mut v: GrB_Vector = std::ptr::null_mut(); + grb_ok(unsafe { GrB_Vector_new(&mut v, GrB_BOOL, n) } as i32)?; + Ok(Self { inner: v }) + } +} + +impl Drop for GraphblasVector { + fn drop(&mut self) { + if !self.inner.is_null() { + unsafe { GrB_Vector_free(&mut self.inner) }; + } + } +} + +unsafe impl Send for GraphblasVector {} +unsafe impl Sync for GraphblasVector {} + +/// A directed, labelled edge as produced by format parsers. +#[derive(Debug, Clone)] +pub struct Edge { + pub source: String, + pub target: String, + pub label: String, +} + +/// A data source that knows how to load itself into a [`GraphBuilder`]. +/// +/// Implement this trait for each format type to make it usable with [`GraphBuilder::load`] and +/// [`Graph::try_from`]. +pub trait GraphSource { + fn apply_to(self, builder: B) -> Result; +} + +/// Builds a [`GraphDecomposition`] from one or more data sources. +pub trait GraphBuilder: Default + Sized { + /// The graph representation this builder produces. + type Graph: GraphDecomposition; + /// The error type for both loading and building. + type Error: std::error::Error + Send + Sync + 'static; + + /// Load a single data source into this builder. + /// + /// This is the primary entry point for feeding data. The default + /// implementation delegates to [`GraphSource::apply_to`]. + fn load>(self, source: S) -> Result { + source.apply_to(self) + } + + /// Finalise the build step, consuming the builder. + fn build(self) -> Result; +} + +/// An immutable, read-only view of a Boolean-decomposed graph. +pub trait GraphDecomposition { + type Error: std::error::Error; + + /// Returns the [`LagraphGraph`] for `label`. + fn get_graph(&self, label: &str) -> Result, Self::Error>; + + /// Translates a string ID to a contiguous matrix index. + fn get_node_id(&self, string_id: &str) -> Option; + + /// Translates a matrix index back to a string ID. + fn get_node_name(&self, mapped_id: usize) -> Option; + fn num_nodes(&self) -> usize; +} + +/// Associates a backend marker type with a concrete [`GraphBuilder`] and +/// [`GraphDecomposition`]. +/// +/// # Example +/// +/// ```no_run +/// use pathrex::graph::{Backend, Graph, InMemory, GraphDecomposition}; +/// use pathrex::formats::Csv; +/// use std::fs::File; +/// +/// let graph = Graph::::try_from( +/// Csv::from_reader(File::open("edges.csv").unwrap()).unwrap() +/// ).unwrap(); +/// println!("Nodes: {}", graph.num_nodes()); +/// ``` +pub trait Backend { + /// The graph type produced by this backend. + type Graph: GraphDecomposition; + /// The builder type for this backend. Must implement `Default` so + /// [`Graph::try_from`] can construct it without arguments. + type Builder: GraphBuilder; +} + + +/// A zero-sized handle parameterised by a [`Backend`] marker type. +/// +/// Use [`Graph::::builder()`] to get a fresh builder, or +/// [`Graph::::try_from(source)`] to build a graph in one call. +pub struct Graph { + _marker: PhantomData, +} + +impl Graph { + pub fn builder() -> B::Builder { + B::Builder::default() + } + + pub fn try_from(source: S) -> Result::Error> + where + S: GraphSource, + { + B::Builder::default().load(source)?.build() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::utils::{CountingBuilder, CountOutput, VecSource}; + + fn edges(triples: &[(&str, &str, &str)]) -> Vec { + triples + .iter() + .map(|&(s, t, l)| Edge { + source: s.into(), + target: t.into(), + label: l.into(), + }) + .collect() + } + + #[test] + fn test_load_and_build() { + let source = VecSource(edges(&[ + ("A", "B", "knows"), + ("B", "C", "knows"), + ("A", "C", "likes"), + ])); + let output: CountOutput = + CountingBuilder::default().load(source).unwrap().build().unwrap(); + assert_eq!(output.num_nodes(), 3); + } + + #[test] + fn test_graph_try_from() { + struct TestBackend; + impl Backend for TestBackend { + type Graph = CountOutput; + type Builder = CountingBuilder; + } + + let source = VecSource(edges(&[ + ("X", "Y", "rel"), + ("Y", "Z", "rel"), + ])); + let g = Graph::::try_from(source).unwrap(); + assert_eq!(g.num_nodes(), 2); + } +} diff --git a/src/lagraph_sys.rs b/src/lagraph_sys.rs new file mode 100644 index 0000000..055a0c6 --- /dev/null +++ b/src/lagraph_sys.rs @@ -0,0 +1,10 @@ +//! FFI bindings for SuiteSparse:GraphBLAS and LAGraph. +#![allow( + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + dead_code, + clippy::all +)] + +include!("lagraph_sys_generated.rs"); diff --git a/src/lagraph_sys_generated.rs b/src/lagraph_sys_generated.rs new file mode 100644 index 0000000..9c333eb --- /dev/null +++ b/src/lagraph_sys_generated.rs @@ -0,0 +1,256 @@ +/* automatically generated by rust-bindgen 0.71.1 */ + +pub type __off_t = ::std::os::raw::c_long; +pub type __off64_t = ::std::os::raw::c_long; +pub type FILE = _IO_FILE; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_marker { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_codecvt { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_wide_data { + _unused: [u8; 0], +} +pub type _IO_lock_t = ::std::os::raw::c_void; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _IO_FILE { + pub _flags: ::std::os::raw::c_int, + pub _IO_read_ptr: *mut ::std::os::raw::c_char, + pub _IO_read_end: *mut ::std::os::raw::c_char, + pub _IO_read_base: *mut ::std::os::raw::c_char, + pub _IO_write_base: *mut ::std::os::raw::c_char, + pub _IO_write_ptr: *mut ::std::os::raw::c_char, + pub _IO_write_end: *mut ::std::os::raw::c_char, + pub _IO_buf_base: *mut ::std::os::raw::c_char, + pub _IO_buf_end: *mut ::std::os::raw::c_char, + pub _IO_save_base: *mut ::std::os::raw::c_char, + pub _IO_backup_base: *mut ::std::os::raw::c_char, + pub _IO_save_end: *mut ::std::os::raw::c_char, + pub _markers: *mut _IO_marker, + pub _chain: *mut _IO_FILE, + pub _fileno: ::std::os::raw::c_int, + pub _flags2: ::std::os::raw::c_int, + pub _old_offset: __off_t, + pub _cur_column: ::std::os::raw::c_ushort, + pub _vtable_offset: ::std::os::raw::c_schar, + pub _shortbuf: [::std::os::raw::c_char; 1usize], + pub _lock: *mut _IO_lock_t, + pub _offset: __off64_t, + pub _codecvt: *mut _IO_codecvt, + pub _wide_data: *mut _IO_wide_data, + pub _freeres_list: *mut _IO_FILE, + pub _freeres_buf: *mut ::std::os::raw::c_void, + pub __pad5: usize, + pub _mode: ::std::os::raw::c_int, + pub _unused2: [::std::os::raw::c_char; 20usize], +} +pub type GrB_Index = u64; +#[repr(i32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum GrB_Info { + GrB_SUCCESS = 0, + GrB_NO_VALUE = 1, + GxB_EXHAUSTED = 7089, + GrB_UNINITIALIZED_OBJECT = -1, + GrB_NULL_POINTER = -2, + GrB_INVALID_VALUE = -3, + GrB_INVALID_INDEX = -4, + GrB_DOMAIN_MISMATCH = -5, + GrB_DIMENSION_MISMATCH = -6, + GrB_OUTPUT_NOT_EMPTY = -7, + GrB_NOT_IMPLEMENTED = -8, + GrB_ALREADY_SET = -9, + GrB_PANIC = -101, + GrB_OUT_OF_MEMORY = -102, + GrB_INSUFFICIENT_SPACE = -103, + GrB_INVALID_OBJECT = -104, + GrB_INDEX_OUT_OF_BOUNDS = -105, + GrB_EMPTY_OBJECT = -106, + GxB_JIT_ERROR = -7001, + GxB_GPU_ERROR = -7002, + GxB_OUTPUT_IS_READONLY = -7003, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GB_Descriptor_opaque { + _unused: [u8; 0], +} +pub type GrB_Descriptor = *mut GB_Descriptor_opaque; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GB_Type_opaque { + _unused: [u8; 0], +} +pub type GrB_Type = *mut GB_Type_opaque; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GB_BinaryOp_opaque { + _unused: [u8; 0], +} +pub type GrB_BinaryOp = *mut GB_BinaryOp_opaque; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GB_Semiring_opaque { + _unused: [u8; 0], +} +pub type GrB_Semiring = *mut GB_Semiring_opaque; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GB_Scalar_opaque { + _unused: [u8; 0], +} +pub type GrB_Scalar = *mut GB_Scalar_opaque; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GB_Vector_opaque { + _unused: [u8; 0], +} +pub type GrB_Vector = *mut GB_Vector_opaque; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GB_Matrix_opaque { + _unused: [u8; 0], +} +pub type GrB_Matrix = *mut GB_Matrix_opaque; +unsafe extern "C" { + pub static mut GrB_BOOL: GrB_Type; +} +unsafe extern "C" { + pub static mut GrB_LOR: GrB_BinaryOp; +} +unsafe extern "C" { + pub static mut GrB_LOR_LAND_SEMIRING_BOOL: GrB_Semiring; +} +unsafe extern "C" { + pub fn GrB_Vector_new(v: *mut GrB_Vector, type_: GrB_Type, n: GrB_Index) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_Vector_nvals(nvals: *mut GrB_Index, v: GrB_Vector) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_Vector_setElement_BOOL(w: GrB_Vector, x: bool, i: GrB_Index) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_Vector_extractTuples_BOOL( + I_: *mut GrB_Index, + X: *mut bool, + nvals: *mut GrB_Index, + V: GrB_Vector, + ) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_Matrix_new( + A: *mut GrB_Matrix, + type_: GrB_Type, + nrows: GrB_Index, + ncols: GrB_Index, + ) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_Matrix_nvals(nvals: *mut GrB_Index, A: GrB_Matrix) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_Matrix_build_BOOL( + C: GrB_Matrix, + I_: *const GrB_Index, + J: *const GrB_Index, + X: *const bool, + nvals: GrB_Index, + dup: GrB_BinaryOp, + ) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_vxm( + w: GrB_Vector, + mask: GrB_Vector, + accum: GrB_BinaryOp, + semiring: GrB_Semiring, + u: GrB_Vector, + A: GrB_Matrix, + desc: GrB_Descriptor, + ) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_Vector_free(object: *mut GrB_Vector) -> GrB_Info; +} +unsafe extern "C" { + pub fn GrB_Matrix_free(object: *mut GrB_Matrix) -> GrB_Info; +} +#[repr(i32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum LAGraph_Kind { + LAGraph_ADJACENCY_UNDIRECTED = 0, + LAGraph_ADJACENCY_DIRECTED = 1, + LAGraph_KIND_UNKNOWN = -1, +} +#[repr(i32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum LAGraph_Boolean { + LAGraph_FALSE = 0, + LAGraph_TRUE = 1, + LAGraph_BOOLEAN_UNKNOWN = -1, +} +#[repr(i32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum LAGraph_State { + LAGraph_VALUE = 0, + LAGraph_BOUND = 1, + LAGraph_STATE_UNKNOWN = -1, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct LAGraph_Graph_struct { + pub A: GrB_Matrix, + pub kind: LAGraph_Kind, + pub AT: GrB_Matrix, + pub out_degree: GrB_Vector, + pub in_degree: GrB_Vector, + pub is_symmetric_structure: LAGraph_Boolean, + pub nself_edges: i64, + pub emin: GrB_Scalar, + pub emin_state: LAGraph_State, + pub emax: GrB_Scalar, + pub emax_state: LAGraph_State, +} +pub type LAGraph_Graph = *mut LAGraph_Graph_struct; +unsafe extern "C" { + pub fn LAGraph_Init(msg: *mut ::std::os::raw::c_char) -> ::std::os::raw::c_int; +} +unsafe extern "C" { + pub fn LAGraph_Finalize(msg: *mut ::std::os::raw::c_char) -> ::std::os::raw::c_int; +} +unsafe extern "C" { + pub fn LAGraph_New( + G: *mut LAGraph_Graph, + A: *mut GrB_Matrix, + kind: LAGraph_Kind, + msg: *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +unsafe extern "C" { + pub fn LAGraph_Delete( + G: *mut LAGraph_Graph, + msg: *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +unsafe extern "C" { + pub fn LAGraph_Cached_AT( + G: LAGraph_Graph, + msg: *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +unsafe extern "C" { + pub fn LAGraph_MMRead( + A: *mut GrB_Matrix, + f: *mut FILE, + msg: *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..cf4f17d --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +pub mod graph; +pub mod formats; +pub(crate) mod utils; + +pub mod lagraph_sys; diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..df92b97 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,77 @@ +use crate::graph::*; +use std::sync::Arc; + +#[allow(dead_code)] +pub struct CountOutput(pub usize, std::marker::PhantomData); + +impl CountOutput { + #[allow(dead_code)] + pub fn new(count: usize) -> Self { + Self(count, std::marker::PhantomData) + } +} + +impl GraphDecomposition for CountOutput { + type Error = E; + + fn get_graph(&self, _label: &str) -> Result, Self::Error> { + unimplemented!("CountOutput is a test stub") + } + + fn get_node_id(&self, _string_id: &str) -> Option { + None + } + + fn get_node_name(&self, _mapped_id: usize) -> Option { + None + } + + fn num_nodes(&self) -> usize { + self.0 + } +} + +/// A minimal [`GraphBuilder`] that counts pushed edges and produces a [`CountOutput`]. +#[allow(dead_code)] +pub struct CountingBuilder( + pub usize, + std::marker::PhantomData, +); + +impl CountingBuilder { + #[allow(dead_code)] + pub fn new() -> Self { + Self(0, std::marker::PhantomData) + } +} + +impl Default for CountingBuilder { + fn default() -> Self { + Self::new() + } +} + +impl GraphBuilder for CountingBuilder { + type Graph = CountOutput; + type Error = E; + + fn build(self) -> Result { + Ok(CountOutput::new(self.0)) + } +} + + +#[allow(dead_code)] +pub struct VecSource(pub Vec); + +impl GraphSource> + for VecSource +{ + fn apply_to( + self, + mut builder: CountingBuilder, + ) -> Result, E> { + builder.0 += self.0.len(); + Ok(builder) + } +}