diff --git a/Cargo.lock b/Cargo.lock index df6a01be..6262f5a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -249,6 +249,16 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -443,6 +453,8 @@ dependencies = [ "env_logger", "fact-api", "fact-ebpf", + "glob", + "globset", "http-body-util", "hyper", "hyper-tls", @@ -574,18 +586,6 @@ dependencies = [ "pin-utils", ] -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - [[package]] name = "getrandom" version = "0.4.1" @@ -607,9 +607,22 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] [[package]] name = "h2" @@ -939,7 +952,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.59.0", ] @@ -1490,7 +1503,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom", "once_cell", "rustix", "windows-sys 0.61.2", @@ -1744,7 +1757,7 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.4.1", + "getrandom", "js-sys", "wasm-bindgen", ] @@ -1776,15 +1789,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -2003,15 +2007,6 @@ dependencies = [ "wit-parser", ] -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - [[package]] name = "wit-bindgen-rust" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 742c25f4..01330200 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ aya = { version = "0.13.1", default-features = false } anyhow = { version = "1", default-features = false, features = ["std", "backtrace"] } clap = { version = "4.5.41", features = ["derive", "env"] } env_logger = { version = "0.11.5", default-features = false, features = ["humantime"] } +glob = "0.3.3" +globset = "0.4.18" http-body-util = "0.1.3" hyper = { version = "1.6.0", default-features = false } hyper-tls = "0.6.0" diff --git a/README.md b/README.md index f4a6c81d..20f712a2 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ In order to run these tests as part of the unit test suite y use the following command: ```shell -cargo test --config 'target."cfg(all())".runner="sudo -E" --features=bpf-test +cargo test --config 'target."cfg(all())".runner="sudo -E"' --features=bpf-test ``` ## Create compile_commands.json diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index 3b448128..1d9cc6fa 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -46,17 +46,21 @@ int BPF_PROG(trace_file_open, struct file* file) { inode_key_t inode_key = inode_to_key(file->f_inode); const inode_value_t* inode = inode_get(&inode_key); + inode_key_t* inode_to_submit = &inode_key; switch (inode_is_monitored(inode)) { case NOT_MONITORED: if (!is_monitored(path)) { goto ignored; } + // Matched by path prefix only, not by inode. + // Set inode to NULL so userspace knows to do glob matching. + inode_to_submit = NULL; break; case MONITORED: break; } - submit_event(&m->file_open, event_type, path->path, &inode_key, true); + submit_event(&m->file_open, event_type, path->path, inode_to_submit, true); return 0; @@ -100,6 +104,7 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { inode_key_t inode_key = inode_to_key(dentry->d_inode); const inode_value_t* inode = inode_get(&inode_key); + inode_key_t* inode_to_submit = &inode_key; switch (inode_is_monitored(inode)) { case NOT_MONITORED: @@ -107,6 +112,9 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { m->path_unlink.ignored++; return 0; } + // Matched by path prefix only, not by inode. + // Set inode to NULL so userspace knows to do glob matching. + inode_to_submit = NULL; break; case MONITORED: @@ -117,7 +125,7 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { submit_event(&m->path_unlink, FILE_ACTIVITY_UNLINK, path->path, - &inode_key, + inode_to_submit, path_hooks_support_bpf_d_path); return 0; @@ -150,6 +158,7 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { inode_key_t inode_key = inode_to_key(path->dentry->d_inode); const inode_value_t* inode = inode_get(&inode_key); + inode_key_t* inode_to_submit = &inode_key; switch (inode_is_monitored(inode)) { case NOT_MONITORED: @@ -157,6 +166,9 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { m->path_chmod.ignored++; return 0; } + // Matched by path prefix only, not by inode. + // Set inode to NULL so userspace knows to do glob matching. + inode_to_submit = NULL; break; case MONITORED: @@ -166,7 +178,7 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { umode_t old_mode = BPF_CORE_READ(path, dentry, d_inode, i_mode); submit_mode_event(&m->path_chmod, bound_path->path, - &inode_key, + inode_to_submit, mode, old_mode, path_hooks_support_bpf_d_path); @@ -201,6 +213,7 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign inode_key_t inode_key = inode_to_key(path->dentry->d_inode); const inode_value_t* inode = inode_get(&inode_key); + inode_key_t* inode_to_submit = &inode_key; switch (inode_is_monitored(inode)) { case NOT_MONITORED: @@ -208,6 +221,9 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign m->path_chown.ignored++; return 0; } + // Matched by path prefix only, not by inode. + // Set inode to NULL so userspace knows to do glob matching. + inode_to_submit = NULL; break; case MONITORED: @@ -220,7 +236,7 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign submit_ownership_event(&m->path_chown, bound_path->path, - &inode_key, + inode_to_submit, uid, gid, old_uid, diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index 655d48d7..17ec3949 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -30,17 +30,22 @@ impl TryFrom<&PathBuf> for path_prefix_t { prefix: value.display().to_string(), }); }; - let len = if filename.len() > LPM_SIZE_MAX as usize { - LPM_SIZE_MAX as usize - } else { - filename.len() - }; + + // Take the start of the path until the first occurence of a wildcard + // character. This is used as a filter in the kernel in cases where + // the inode has failed to match. The full wildcard string is used + // for further processing in userspace. + // + // unwrap is safe here - if there are no matches, the full string is the + // only item in the iterator + let filename_prefix = filename.split(['*', '?', '[', '{']).next().unwrap(); + let len = filename_prefix.len().min(LPM_SIZE_MAX as usize); unsafe { let mut cfg: path_prefix_t = std::mem::zeroed(); memcpy( cfg.path.as_mut_ptr() as *mut _, - filename.as_ptr() as *const _, + filename_prefix.as_ptr() as *const _, len, ); cfg.bit_len = (len * 8) as u32; @@ -63,6 +68,12 @@ impl PartialEq for path_prefix_t { unsafe impl Pod for path_prefix_t {} +impl inode_key_t { + pub fn empty(&self) -> bool { + self.inode == 0 && self.dev == 0 + } +} + impl PartialEq for inode_key_t { fn eq(&self, other: &Self) -> bool { self.inode == other.inode && self.dev == other.dev diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 3b84db24..64218b33 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -10,6 +10,8 @@ anyhow = { workspace = true } aya = { workspace = true } clap = { workspace = true } env_logger = { workspace = true } +glob = { workspace = true } +globset = { workspace = true } http-body-util = { workspace = true } hyper = { workspace = true } hyper-tls = { workspace = true } diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index ec9ca57f..e6b3e0e3 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -7,6 +7,7 @@ use aya::{ Btf, Ebpf, }; use checks::Checks; +use globset::{Glob, GlobSet, GlobSetBuilder}; use libc::c_char; use log::{error, info}; use tokio::{ @@ -30,6 +31,8 @@ pub struct Bpf { paths: Vec, paths_config: watch::Receiver>, + + paths_globset: GlobSet, } impl Bpf { @@ -61,6 +64,7 @@ impl Bpf { tx, paths, paths_config, + paths_globset: GlobSet::empty(), }; bpf.load_paths()?; @@ -127,11 +131,23 @@ impl Bpf { // Add the new prefixes let mut new_paths = Vec::with_capacity(paths_config.len()); + let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { + let Some(glob_str) = p.to_str() else { + bail!("failed to convert path {} to string", p.display()); + }; + + builder.add( + Glob::new(glob_str) + .with_context(|| format!("invalid glob {}", glob_str)) + .unwrap(), + ); + let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); } + self.paths_globset = builder.build()?; // Remove old prefixes for p in self.paths.iter().filter(|p| !new_paths.contains(p)) { @@ -193,7 +209,21 @@ impl Bpf { while let Some(event) = ringbuf.next() { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { - Ok(event) => event, + Ok(event) => { + // With wildcards, the kernel can only match on the inode and + // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, + // the kernel matches up to /etc/) + // + // The kernel sets inode to 0 when it matched via path prefix only. + // so we only need to perform a glob match against the filename + if !event.get_inode().empty() || + self.paths_globset.is_match(event.get_filename()) { + event + } else { + event_counter.dropped(); + continue; + } + }, Err(e) => { error!("Failed to parse event: '{e}'"); event_counter.dropped(); @@ -253,7 +283,7 @@ mod bpf_tests { let monitored_path = env!("CARGO_MANIFEST_DIR"); let monitored_path = PathBuf::from(monitored_path); - let paths = vec![monitored_path.clone()]; + let paths = vec![PathBuf::from(format!("{}/**/*", monitored_path.display()))]; let mut config = FactConfig::default(); config.set_paths(paths); let reloader = Reloader::from(config); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 463fb2c7..ee53676e 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -123,6 +123,26 @@ impl Event { } } + pub fn get_filename(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.filename, + FileData::Creation(data) => &data.filename, + FileData::Unlink(data) => &data.filename, + FileData::Chmod(data) => &data.inner.filename, + FileData::Chown(data) => &data.inner.filename, + } + } + + pub fn get_host_path(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.host_file, + FileData::Creation(data) => &data.host_file, + FileData::Unlink(data) => &data.host_file, + FileData::Chmod(data) => &data.inner.host_file, + FileData::Chown(data) => &data.inner.host_file, + } + } + pub fn set_host_path(&mut self, host_path: PathBuf) { match &mut self.file { FileData::Open(data) => data.host_file = host_path, diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index ac2b5bae..faa28757 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -25,7 +25,7 @@ use std::{ sync::Arc, }; -use anyhow::Context; +use anyhow::{Context, bail}; use aya::maps::MapData; use fact_ebpf::{inode_key_t, inode_value_t}; use log::{debug, info, warn}; @@ -75,8 +75,8 @@ impl HostScanner { fn scan(&self) -> anyhow::Result<()> { debug!("Host scan started"); - for path in self.config.borrow().iter() { - let path = host_info::prepend_host_mount(path); + for pattern in self.config.borrow().iter() { + let path = host_info::prepend_host_mount(pattern); self.scan_inner(&path)?; } debug!("Host scan done"); @@ -85,15 +85,21 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - if path.is_dir() { - for entry in path.read_dir()?.flatten() { - let entry = entry.path(); - self.scan_inner(&entry) - .with_context(|| format!("Failed to scan {}", entry.display()))?; + let Some(glob_str) = path.to_str() else { + bail!("invalid path {}", path.display()); + }; + + for entry in glob::glob(glob_str)? { + match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()).with_context(|| { + format!("Failed to update entry for {}", path.display()) + })?; + } + } + Err(e) => return Err(e.into()), } - } else if path.is_file() { - self.update_entry(path) - .with_context(|| format!("Failed to update entry for {}", path.display()))?; } Ok(()) } diff --git a/tests/conftest.py b/tests/conftest.py index aee04534..5a410487 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -141,7 +141,7 @@ def dump_logs(container, file): def fact_config(request, monitored_dir, logs_dir): cwd = os.getcwd() config = { - 'paths': [monitored_dir, '/mounted', '/container-dir'], + 'paths': [f'{monitored_dir}/**/*', '/mounted/**/*', '/container-dir/**/*'], 'grpc': { 'url': 'http://127.0.0.1:9999', }, diff --git a/tests/test_config_hotreload.py b/tests/test_config_hotreload.py index 9a875228..4afeac8f 100644 --- a/tests/test_config_hotreload.py +++ b/tests/test_config_hotreload.py @@ -136,7 +136,7 @@ def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [ignored_dir] + config['paths'] = [f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up @@ -173,7 +173,7 @@ def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [monitored_dir, ignored_dir] + config['paths'] = [f'{monitored_dir}/**/*', f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up diff --git a/tests/test_wildcard.py b/tests/test_wildcard.py new file mode 100644 index 00000000..fd1728f3 --- /dev/null +++ b/tests/test_wildcard.py @@ -0,0 +1,128 @@ +from time import sleep +import os + +import pytest +import yaml + +from event import Event, EventType, Process + + +@pytest.fixture +def wildcard_config(fact, fact_config, monitored_dir): + config, config_file = fact_config + config['paths'] = [ + f'{monitored_dir}/**/*.txt', + f'{monitored_dir}/*.conf', + f'{monitored_dir}/**/test-*.log', + ] + with open(config_file, 'w') as f: + yaml.dump(config, f) + + # reload the config + fact.kill('SIGHUP') + sleep(0.1) + return config, config_file + + +def test_extension_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + # Should not match any pattern + log_file = os.path.join(monitored_dir, 'app.log') + with open(log_file, 'w') as f: + f.write('This should be ignored') + + txt_file = os.path.join(monitored_dir, 'document.txt') + with open(txt_file, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path='') + + server.wait_events([e]) + + +def test_prefix_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + # Wrong prefix - should not match + app_log = os.path.join(monitored_dir, 'app-test.log') + with open(app_log, 'w') as f: + f.write('This should be ignored') + + test_log = os.path.join(monitored_dir, 'test-app.log') + with open(test_log, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=test_log, host_path='') + + server.wait_events([e]) + + +def test_recursive_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + nested_dir = os.path.join(monitored_dir, 'level1', 'level2') + os.makedirs(nested_dir, exist_ok=True) + + # Different extension - should not match + nested_md = os.path.join(nested_dir, 'readme.md') + with open(nested_md, 'w') as f: + f.write('Should be ignored') + + root_txt = os.path.join(monitored_dir, 'root.txt') + with open(root_txt, 'w') as f: + f.write('Root level txt') + + nested_txt = os.path.join(nested_dir, 'nested.txt') + with open(nested_txt, 'w') as f: + f.write('Nested txt') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=root_txt, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=nested_txt, host_path=''), + ] + + server.wait_events(events) + + +def test_nonrecursive_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + fut = os.path.join(monitored_dir, 'app.conf') + with open(fut, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path='') + + server.wait_events([e]) + + +def test_multiple_patterns(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + # Matches no pattern + conf_file = os.path.join(monitored_dir, 'config.yml') + with open(conf_file, 'w') as f: + f.write('Config file') + + txt_file = os.path.join(monitored_dir, 'notes.txt') + with open(txt_file, 'w') as f: + f.write('Text file') + + log_file = os.path.join(monitored_dir, 'test-output.log') + with open(log_file, 'w') as f: + f.write('Log file') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=log_file, host_path=''), + ] + + server.wait_events(events)