From 6c94e6b2830acd6eaf76301b305a6b9bd4295db4 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 14:09:19 +0000 Subject: [PATCH 1/8] Moves glob/wildcard matching into Fact. Host scanning now uses globs to only get inodes for the specific files matching the globs. Prefix map is populated with the longest prefix for each glob e.g. /etc/**/*.conf -> /etc/ /home/user/.ssh/id_{rsa,dsa} -> /home/user/.ssh/id_ Kernel captures events based on inode first and then prefix match (this behavior is unchanged) and then userspace does a glob match on the path and host_path. --- Cargo.lock | 65 +++++++++++++++++++--------------------- Cargo.toml | 2 ++ fact-ebpf/src/lib.rs | 21 +++++++++++-- fact/Cargo.toml | 2 ++ fact/src/bpf/mod.rs | 24 ++++++++++++++- fact/src/event/mod.rs | 20 +++++++++++++ fact/src/host_scanner.rs | 25 ++++++++-------- 7 files changed, 108 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index df6a01be..6262f5a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -249,6 +249,16 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -443,6 +453,8 @@ dependencies = [ "env_logger", "fact-api", "fact-ebpf", + "glob", + "globset", "http-body-util", "hyper", "hyper-tls", @@ -574,18 +586,6 @@ dependencies = [ "pin-utils", ] -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - [[package]] name = "getrandom" version = "0.4.1" @@ -607,9 +607,22 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] [[package]] name = "h2" @@ -939,7 +952,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.59.0", ] @@ -1490,7 +1503,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom", "once_cell", "rustix", "windows-sys 0.61.2", @@ -1744,7 +1757,7 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.4.1", + "getrandom", "js-sys", "wasm-bindgen", ] @@ -1776,15 +1789,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -2003,15 +2007,6 @@ dependencies = [ "wit-parser", ] -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - [[package]] name = "wit-bindgen-rust" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 742c25f4..01330200 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ aya = { version = "0.13.1", default-features = false } anyhow = { version = "1", default-features = false, features = ["std", "backtrace"] } clap = { version = "4.5.41", features = ["derive", "env"] } env_logger = { version = "0.11.5", default-features = false, features = ["humantime"] } +glob = "0.3.3" +globset = "0.4.18" http-body-util = "0.1.3" hyper = { version = "1.6.0", default-features = false } hyper-tls = "0.6.0" diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index 655d48d7..29f6dac2 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -30,17 +30,32 @@ impl TryFrom<&PathBuf> for path_prefix_t { prefix: value.display().to_string(), }); }; - let len = if filename.len() > LPM_SIZE_MAX as usize { + + // Take the start of the path until the first occurence of a wildcard + // character. This is used as a filter in the kernel in cases where + // the inode has failed to match. The full wildcard string is used + // for further processing in userspace. + let filename_prefix = if let Some(wildcard_idx) = filename.chars().position(|c| { + "*?[]{}".contains(c) + }) { + &filename[..wildcard_idx] + } else { + // if there are no wildcards then the whole path can be + // the prefix + filename + }; + + let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { LPM_SIZE_MAX as usize } else { - filename.len() + filename_prefix.len() }; unsafe { let mut cfg: path_prefix_t = std::mem::zeroed(); memcpy( cfg.path.as_mut_ptr() as *mut _, - filename.as_ptr() as *const _, + filename_prefix.as_ptr() as *const _, len, ); cfg.bit_len = (len * 8) as u32; diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 3b84db24..64218b33 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -10,6 +10,8 @@ anyhow = { workspace = true } aya = { workspace = true } clap = { workspace = true } env_logger = { workspace = true } +glob = { workspace = true } +globset = { workspace = true } http-body-util = { workspace = true } hyper = { workspace = true } hyper-tls = { workspace = true } diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index ec9ca57f..8175707e 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -14,6 +14,7 @@ use tokio::{ sync::{mpsc, watch}, task::JoinHandle, }; +use globset::{GlobSet, GlobSetBuilder, Glob}; use crate::{event::Event, host_info, metrics::EventCounter}; @@ -30,6 +31,8 @@ pub struct Bpf { paths: Vec, paths_config: watch::Receiver>, + + paths_globset: GlobSet, } impl Bpf { @@ -61,6 +64,7 @@ impl Bpf { tx, paths, paths_config, + paths_globset: GlobSet::empty(), }; bpf.load_paths()?; @@ -127,11 +131,14 @@ impl Bpf { // Add the new prefixes let mut new_paths = Vec::with_capacity(paths_config.len()); + let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { + builder.add(Glob::new(&p.to_string_lossy())?); let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); } + self.paths_globset = builder.build()?; // Remove old prefixes for p in self.paths.iter().filter(|p| !new_paths.contains(p)) { @@ -193,7 +200,22 @@ impl Bpf { while let Some(event) = ringbuf.next() { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { - Ok(event) => event, + Ok(event) => { + // With wildcards, the kernel can only match on the inode and + // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, + // the kernel matches up to /etc/) + // + // We do a proper glob match here to do a final check + // using short circuiting to avoid calling is_match in all + // scenarios + if self.paths_globset.is_match(event.get_filename()) || + self.paths_globset.is_match(event.get_host_path()) { + event + } else { + event_counter.dropped(); + continue; + } + }, Err(e) => { error!("Failed to parse event: '{e}'"); event_counter.dropped(); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 463fb2c7..ee53676e 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -123,6 +123,26 @@ impl Event { } } + pub fn get_filename(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.filename, + FileData::Creation(data) => &data.filename, + FileData::Unlink(data) => &data.filename, + FileData::Chmod(data) => &data.inner.filename, + FileData::Chown(data) => &data.inner.filename, + } + } + + pub fn get_host_path(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.host_file, + FileData::Creation(data) => &data.host_file, + FileData::Unlink(data) => &data.host_file, + FileData::Chmod(data) => &data.inner.host_file, + FileData::Chown(data) => &data.inner.host_file, + } + } + pub fn set_host_path(&mut self, host_path: PathBuf) { match &mut self.file { FileData::Open(data) => data.host_file = host_path, diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index ac2b5bae..78a9a93c 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -75,8 +75,8 @@ impl HostScanner { fn scan(&self) -> anyhow::Result<()> { debug!("Host scan started"); - for path in self.config.borrow().iter() { - let path = host_info::prepend_host_mount(path); + for pattern in self.config.borrow().iter() { + let path = host_info::prepend_host_mount(pattern); self.scan_inner(&path)?; } debug!("Host scan done"); @@ -85,17 +85,18 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - if path.is_dir() { - for entry in path.read_dir()?.flatten() { - let entry = entry.path(); - self.scan_inner(&entry) - .with_context(|| format!("Failed to scan {}", entry.display()))?; + glob::glob(&path.to_string_lossy())?.try_for_each(|entry| { + match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()) + .with_context(|| format!("Failed to update entry for {}", path.display()))?; + } + Ok(()) + }, + Err(e) => Err(e.into()) } - } else if path.is_file() { - self.update_entry(path) - .with_context(|| format!("Failed to update entry for {}", path.display()))?; - } - Ok(()) + }) } fn update_entry(&self, path: &Path) -> anyhow::Result<()> { From 1c6a8e475792dd57c50e3db6934e32c506a8f559 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 14:20:30 +0000 Subject: [PATCH 2/8] Fmt --- fact-ebpf/src/lib.rs | 17 ++++++++--------- fact/src/bpf/mod.rs | 2 +- fact/src/host_scanner.rs | 19 +++++++++---------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index 29f6dac2..fb1e466c 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -35,15 +35,14 @@ impl TryFrom<&PathBuf> for path_prefix_t { // character. This is used as a filter in the kernel in cases where // the inode has failed to match. The full wildcard string is used // for further processing in userspace. - let filename_prefix = if let Some(wildcard_idx) = filename.chars().position(|c| { - "*?[]{}".contains(c) - }) { - &filename[..wildcard_idx] - } else { - // if there are no wildcards then the whole path can be - // the prefix - filename - }; + let filename_prefix = + if let Some(wildcard_idx) = filename.chars().position(|c| "*?[]{}".contains(c)) { + &filename[..wildcard_idx] + } else { + // if there are no wildcards then the whole path can be + // the prefix + filename + }; let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { LPM_SIZE_MAX as usize diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 8175707e..cf4b89a6 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -7,6 +7,7 @@ use aya::{ Btf, Ebpf, }; use checks::Checks; +use globset::{Glob, GlobSet, GlobSetBuilder}; use libc::c_char; use log::{error, info}; use tokio::{ @@ -14,7 +15,6 @@ use tokio::{ sync::{mpsc, watch}, task::JoinHandle, }; -use globset::{GlobSet, GlobSetBuilder, Glob}; use crate::{event::Event, host_info, metrics::EventCounter}; diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 78a9a93c..0db63aa4 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -85,17 +85,16 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - glob::glob(&path.to_string_lossy())?.try_for_each(|entry| { - match entry { - Ok(path) => { - if path.is_file() { - self.update_entry(path.as_path()) - .with_context(|| format!("Failed to update entry for {}", path.display()))?; - } - Ok(()) - }, - Err(e) => Err(e.into()) + glob::glob(&path.to_string_lossy())?.try_for_each(|entry| match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()).with_context(|| { + format!("Failed to update entry for {}", path.display()) + })?; + } + Ok(()) } + Err(e) => Err(e.into()), }) } From 7946408ae88a26dc8f6deef39e4c3aed65f43e81 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 15:26:45 +0000 Subject: [PATCH 3/8] PR review fixes --- fact-ebpf/src/lib.rs | 25 +++++++++++-------------- fact/src/bpf/mod.rs | 10 +++++++--- fact/src/host_scanner.rs | 20 +++++++++++--------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index fb1e466c..17ec3949 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -35,20 +35,11 @@ impl TryFrom<&PathBuf> for path_prefix_t { // character. This is used as a filter in the kernel in cases where // the inode has failed to match. The full wildcard string is used // for further processing in userspace. - let filename_prefix = - if let Some(wildcard_idx) = filename.chars().position(|c| "*?[]{}".contains(c)) { - &filename[..wildcard_idx] - } else { - // if there are no wildcards then the whole path can be - // the prefix - filename - }; - - let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { - LPM_SIZE_MAX as usize - } else { - filename_prefix.len() - }; + // + // unwrap is safe here - if there are no matches, the full string is the + // only item in the iterator + let filename_prefix = filename.split(['*', '?', '[', '{']).next().unwrap(); + let len = filename_prefix.len().min(LPM_SIZE_MAX as usize); unsafe { let mut cfg: path_prefix_t = std::mem::zeroed(); @@ -77,6 +68,12 @@ impl PartialEq for path_prefix_t { unsafe impl Pod for path_prefix_t {} +impl inode_key_t { + pub fn empty(&self) -> bool { + self.inode == 0 && self.dev == 0 + } +} + impl PartialEq for inode_key_t { fn eq(&self, other: &Self) -> bool { self.inode == other.inode && self.dev == other.dev diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index cf4b89a6..dd52f9c9 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -133,7 +133,11 @@ impl Bpf { let mut new_paths = Vec::with_capacity(paths_config.len()); let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { - builder.add(Glob::new(&p.to_string_lossy())?); + builder.add( + Glob::new(&p.to_string_lossy()) + .with_context(|| format!("invalid glob {}", p.display())) + .unwrap(), + ); let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); @@ -208,8 +212,8 @@ impl Bpf { // We do a proper glob match here to do a final check // using short circuiting to avoid calling is_match in all // scenarios - if self.paths_globset.is_match(event.get_filename()) || - self.paths_globset.is_match(event.get_host_path()) { + if !event.get_inode().empty() || + self.paths_globset.is_match(event.get_filename()) { event } else { event_counter.dropped(); diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 0db63aa4..b6cbeb58 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -85,17 +85,19 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - glob::glob(&path.to_string_lossy())?.try_for_each(|entry| match entry { - Ok(path) => { - if path.is_file() { - self.update_entry(path.as_path()).with_context(|| { - format!("Failed to update entry for {}", path.display()) - })?; + for entry in glob::glob(&path.to_string_lossy())? { + match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()).with_context(|| { + format!("Failed to update entry for {}", path.display()) + })?; + } } - Ok(()) + Err(e) => return Err(e.into()), } - Err(e) => Err(e.into()), - }) + } + Ok(()) } fn update_entry(&self, path: &Path) -> anyhow::Result<()> { From c65e6a557bec74127b3e94fe46b035f11ae718ce Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 12:39:09 +0000 Subject: [PATCH 4/8] Fix matching/tests and add wildcard tests --- fact-ebpf/src/bpf/main.c | 24 ++++++-- fact/src/bpf/mod.rs | 5 +- tests/conftest.py | 2 +- tests/test_config_hotreload.py | 4 +- tests/test_wildcard.py | 109 +++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 10 deletions(-) create mode 100644 tests/test_wildcard.py diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index 3b448128..1d9cc6fa 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -46,17 +46,21 @@ int BPF_PROG(trace_file_open, struct file* file) { inode_key_t inode_key = inode_to_key(file->f_inode); const inode_value_t* inode = inode_get(&inode_key); + inode_key_t* inode_to_submit = &inode_key; switch (inode_is_monitored(inode)) { case NOT_MONITORED: if (!is_monitored(path)) { goto ignored; } + // Matched by path prefix only, not by inode. + // Set inode to NULL so userspace knows to do glob matching. + inode_to_submit = NULL; break; case MONITORED: break; } - submit_event(&m->file_open, event_type, path->path, &inode_key, true); + submit_event(&m->file_open, event_type, path->path, inode_to_submit, true); return 0; @@ -100,6 +104,7 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { inode_key_t inode_key = inode_to_key(dentry->d_inode); const inode_value_t* inode = inode_get(&inode_key); + inode_key_t* inode_to_submit = &inode_key; switch (inode_is_monitored(inode)) { case NOT_MONITORED: @@ -107,6 +112,9 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { m->path_unlink.ignored++; return 0; } + // Matched by path prefix only, not by inode. + // Set inode to NULL so userspace knows to do glob matching. + inode_to_submit = NULL; break; case MONITORED: @@ -117,7 +125,7 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { submit_event(&m->path_unlink, FILE_ACTIVITY_UNLINK, path->path, - &inode_key, + inode_to_submit, path_hooks_support_bpf_d_path); return 0; @@ -150,6 +158,7 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { inode_key_t inode_key = inode_to_key(path->dentry->d_inode); const inode_value_t* inode = inode_get(&inode_key); + inode_key_t* inode_to_submit = &inode_key; switch (inode_is_monitored(inode)) { case NOT_MONITORED: @@ -157,6 +166,9 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { m->path_chmod.ignored++; return 0; } + // Matched by path prefix only, not by inode. + // Set inode to NULL so userspace knows to do glob matching. + inode_to_submit = NULL; break; case MONITORED: @@ -166,7 +178,7 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { umode_t old_mode = BPF_CORE_READ(path, dentry, d_inode, i_mode); submit_mode_event(&m->path_chmod, bound_path->path, - &inode_key, + inode_to_submit, mode, old_mode, path_hooks_support_bpf_d_path); @@ -201,6 +213,7 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign inode_key_t inode_key = inode_to_key(path->dentry->d_inode); const inode_value_t* inode = inode_get(&inode_key); + inode_key_t* inode_to_submit = &inode_key; switch (inode_is_monitored(inode)) { case NOT_MONITORED: @@ -208,6 +221,9 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign m->path_chown.ignored++; return 0; } + // Matched by path prefix only, not by inode. + // Set inode to NULL so userspace knows to do glob matching. + inode_to_submit = NULL; break; case MONITORED: @@ -220,7 +236,7 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign submit_ownership_event(&m->path_chown, bound_path->path, - &inode_key, + inode_to_submit, uid, gid, old_uid, diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index dd52f9c9..c25de104 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -209,9 +209,8 @@ impl Bpf { // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, // the kernel matches up to /etc/) // - // We do a proper glob match here to do a final check - // using short circuiting to avoid calling is_match in all - // scenarios + // The kernel sets inode to 0 when it matched via path prefix only. + // so we only need to perform a glob match against the filename if !event.get_inode().empty() || self.paths_globset.is_match(event.get_filename()) { event diff --git a/tests/conftest.py b/tests/conftest.py index aee04534..5a410487 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -141,7 +141,7 @@ def dump_logs(container, file): def fact_config(request, monitored_dir, logs_dir): cwd = os.getcwd() config = { - 'paths': [monitored_dir, '/mounted', '/container-dir'], + 'paths': [f'{monitored_dir}/**/*', '/mounted/**/*', '/container-dir/**/*'], 'grpc': { 'url': 'http://127.0.0.1:9999', }, diff --git a/tests/test_config_hotreload.py b/tests/test_config_hotreload.py index 9a875228..4afeac8f 100644 --- a/tests/test_config_hotreload.py +++ b/tests/test_config_hotreload.py @@ -136,7 +136,7 @@ def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [ignored_dir] + config['paths'] = [f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up @@ -173,7 +173,7 @@ def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [monitored_dir, ignored_dir] + config['paths'] = [f'{monitored_dir}/**/*', f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up diff --git a/tests/test_wildcard.py b/tests/test_wildcard.py new file mode 100644 index 00000000..fb815ee4 --- /dev/null +++ b/tests/test_wildcard.py @@ -0,0 +1,109 @@ +import os + +import pytest +import yaml + +from event import Event, EventType, Process + + +@pytest.fixture +def wildcard_config(fact_config, monitored_dir): + config, config_file = fact_config + config['paths'] = [ + f'{monitored_dir}/**/*.txt', + f'{monitored_dir}/**/test-*.log', + ] + with open(config_file, 'w') as f: + yaml.dump(config, f) + return config, config_file + + +def test_extension_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + txt_file = os.path.join(monitored_dir, 'document.txt') + with open(txt_file, 'w') as f: + f.write('This should be captured') + + # Should not match any pattern + log_file = os.path.join(monitored_dir, 'app.log') + with open(log_file, 'w') as f: + f.write('This should be ignored') + + e = Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path='') + + server.wait_events([e]) + + +def test_prefix_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + test_log = os.path.join(monitored_dir, 'test-app.log') + with open(test_log, 'w') as f: + f.write('This should be captured') + + # Wrong prefix - should not match + app_log = os.path.join(monitored_dir, 'app-test.log') + with open(app_log, 'w') as f: + f.write('This should be ignored') + + e = Event(process=process, event_type=EventType.CREATION, + file=test_log, host_path='') + + server.wait_events([e]) + + +def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + nested_dir = os.path.join(monitored_dir, 'level1', 'level2') + os.makedirs(nested_dir, exist_ok=True) + + root_txt = os.path.join(monitored_dir, 'root.txt') + with open(root_txt, 'w') as f: + f.write('Root level txt') + + nested_txt = os.path.join(nested_dir, 'nested.txt') + with open(nested_txt, 'w') as f: + f.write('Nested txt') + + # Different extension - should not match + nested_md = os.path.join(nested_dir, 'readme.md') + with open(nested_md, 'w') as f: + f.write('Should be ignored') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=root_txt, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=nested_txt, host_path=''), + ] + + server.wait_events(events) + + +def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + txt_file = os.path.join(monitored_dir, 'notes.txt') + with open(txt_file, 'w') as f: + f.write('Text file') + + log_file = os.path.join(monitored_dir, 'test-output.log') + with open(log_file, 'w') as f: + f.write('Log file') + + # Matches neither pattern + conf_file = os.path.join(monitored_dir, 'config.yml') + with open(conf_file, 'w') as f: + f.write('Config file') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=log_file, host_path=''), + ] + + server.wait_events(events) From cf3a711fa42edbf317f34ede11ec309afe60a62d Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 14:20:08 +0000 Subject: [PATCH 5/8] Fix basic unit test --- fact/src/bpf/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index c25de104..9a00a7bf 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -278,7 +278,7 @@ mod bpf_tests { let monitored_path = env!("CARGO_MANIFEST_DIR"); let monitored_path = PathBuf::from(monitored_path); - let paths = vec![monitored_path.clone()]; + let paths = vec![PathBuf::from(format!("{}/**/*", monitored_path.display()))]; let mut config = FactConfig::default(); config.set_paths(paths); let reloader = Reloader::from(config); From d960ecad7eb3f2948258737e166d37723940d423 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 14:20:38 +0000 Subject: [PATCH 6/8] Fix missing single quote in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f4a6c81d..20f712a2 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ In order to run these tests as part of the unit test suite y use the following command: ```shell -cargo test --config 'target."cfg(all())".runner="sudo -E" --features=bpf-test +cargo test --config 'target."cfg(all())".runner="sudo -E"' --features=bpf-test ``` ## Create compile_commands.json From a31631379a4ae7ba2a73ff26485ecdc1076f2dec Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Wed, 25 Feb 2026 16:22:35 +0000 Subject: [PATCH 7/8] Fix tests based on PR comments --- tests/test_wildcard.py | 65 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/tests/test_wildcard.py b/tests/test_wildcard.py index fb815ee4..fd1728f3 100644 --- a/tests/test_wildcard.py +++ b/tests/test_wildcard.py @@ -1,3 +1,4 @@ +from time import sleep import os import pytest @@ -7,59 +8,69 @@ @pytest.fixture -def wildcard_config(fact_config, monitored_dir): +def wildcard_config(fact, fact_config, monitored_dir): config, config_file = fact_config config['paths'] = [ f'{monitored_dir}/**/*.txt', + f'{monitored_dir}/*.conf', f'{monitored_dir}/**/test-*.log', ] with open(config_file, 'w') as f: yaml.dump(config, f) + + # reload the config + fact.kill('SIGHUP') + sleep(0.1) return config, config_file -def test_extension_wildcard(fact, wildcard_config, monitored_dir, server): +def test_extension_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() - txt_file = os.path.join(monitored_dir, 'document.txt') - with open(txt_file, 'w') as f: - f.write('This should be captured') - # Should not match any pattern log_file = os.path.join(monitored_dir, 'app.log') with open(log_file, 'w') as f: f.write('This should be ignored') + txt_file = os.path.join(monitored_dir, 'document.txt') + with open(txt_file, 'w') as f: + f.write('This should be captured') + e = Event(process=process, event_type=EventType.CREATION, file=txt_file, host_path='') server.wait_events([e]) -def test_prefix_wildcard(fact, wildcard_config, monitored_dir, server): +def test_prefix_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() - test_log = os.path.join(monitored_dir, 'test-app.log') - with open(test_log, 'w') as f: - f.write('This should be captured') - # Wrong prefix - should not match app_log = os.path.join(monitored_dir, 'app-test.log') with open(app_log, 'w') as f: f.write('This should be ignored') + test_log = os.path.join(monitored_dir, 'test-app.log') + with open(test_log, 'w') as f: + f.write('This should be captured') + e = Event(process=process, event_type=EventType.CREATION, file=test_log, host_path='') server.wait_events([e]) -def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): +def test_recursive_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() nested_dir = os.path.join(monitored_dir, 'level1', 'level2') os.makedirs(nested_dir, exist_ok=True) + # Different extension - should not match + nested_md = os.path.join(nested_dir, 'readme.md') + with open(nested_md, 'w') as f: + f.write('Should be ignored') + root_txt = os.path.join(monitored_dir, 'root.txt') with open(root_txt, 'w') as f: f.write('Root level txt') @@ -68,11 +79,6 @@ def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): with open(nested_txt, 'w') as f: f.write('Nested txt') - # Different extension - should not match - nested_md = os.path.join(nested_dir, 'readme.md') - with open(nested_md, 'w') as f: - f.write('Should be ignored') - events = [ Event(process=process, event_type=EventType.CREATION, file=root_txt, host_path=''), @@ -83,9 +89,27 @@ def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): server.wait_events(events) -def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): +def test_nonrecursive_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + fut = os.path.join(monitored_dir, 'app.conf') + with open(fut, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path='') + + server.wait_events([e]) + + +def test_multiple_patterns(wildcard_config, monitored_dir, server): process = Process.from_proc() + # Matches no pattern + conf_file = os.path.join(monitored_dir, 'config.yml') + with open(conf_file, 'w') as f: + f.write('Config file') + txt_file = os.path.join(monitored_dir, 'notes.txt') with open(txt_file, 'w') as f: f.write('Text file') @@ -94,11 +118,6 @@ def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): with open(log_file, 'w') as f: f.write('Log file') - # Matches neither pattern - conf_file = os.path.join(monitored_dir, 'config.yml') - with open(conf_file, 'w') as f: - f.write('Config file') - events = [ Event(process=process, event_type=EventType.CREATION, file=txt_file, host_path=''), From caaf860e3393044640aaf17c859312c40198be39 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Wed, 25 Feb 2026 16:40:21 +0000 Subject: [PATCH 8/8] Use to_str instead of lossy --- fact/src/bpf/mod.rs | 9 +++++++-- fact/src/host_scanner.rs | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 9a00a7bf..e6b3e0e3 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -133,11 +133,16 @@ impl Bpf { let mut new_paths = Vec::with_capacity(paths_config.len()); let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { + let Some(glob_str) = p.to_str() else { + bail!("failed to convert path {} to string", p.display()); + }; + builder.add( - Glob::new(&p.to_string_lossy()) - .with_context(|| format!("invalid glob {}", p.display())) + Glob::new(glob_str) + .with_context(|| format!("invalid glob {}", glob_str)) .unwrap(), ); + let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index b6cbeb58..faa28757 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -25,7 +25,7 @@ use std::{ sync::Arc, }; -use anyhow::Context; +use anyhow::{Context, bail}; use aya::maps::MapData; use fact_ebpf::{inode_key_t, inode_value_t}; use log::{debug, info, warn}; @@ -85,7 +85,11 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - for entry in glob::glob(&path.to_string_lossy())? { + let Some(glob_str) = path.to_str() else { + bail!("invalid path {}", path.display()); + }; + + for entry in glob::glob(glob_str)? { match entry { Ok(path) => { if path.is_file() {