Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions CaloCluster/data/calo_cluster_net_v2_stage1.norm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"schema_version": 1,
"node_features": [
"log_e",
"t",
"x",
"y",
"r",
"e_rel"
],
"edge_features": [
"dx",
"dy",
"d",
"dt",
"dlog_e",
"asym_e",
"logsum_e",
"dr"
],
"node_mean": [
2.4069125652313232,
834.6543579101562,
-23.62708282470703,
70.98532104492188,
455.12542724609375,
0.38432541489601135
],
"node_std": [
0.7527871131896973,
390.6362609863281,
325.2221374511719,
315.6965026855469,
62.38364028930664,
0.29344040155410767
],
"edge_mean": [
0.0,
0.0,
95.80323791503906,
0.0,
0.0,
0.0,
3.102428674697876,
0.0
],
"edge_std": [
108.05096435546875,
107.51701354980469,
118.56077575683594,
4.873984336853027,
1.2146189212799072,
0.5318130850791931,
0.6094895005226135,
47.51533889770508
],
"node_count": 348548,
"edge_count": 831668
}
Binary file added CaloCluster/data/calo_cluster_net_v2_stage1.onnx
Binary file not shown.
Binary file added CaloCluster/data/simple_edge_net_v2.onnx
Binary file not shown.
51 changes: 51 additions & 0 deletions CaloCluster/fcl/from_mcs-gnn-prod.fcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Production-style FCL that runs both BFS clustering (existing) and
# GNN clustering (new) on MCS art-format input. Demonstrates the
# `CaloClusterGNN` prolog pattern from
# Offline/CaloCluster/fcl/prolog.fcl -- production-reco FCLs include
# both the BFS sequence and the GNN sequence to emit two
# CaloClusterCollections side by side.
#
# Usage (working_dir, with u092):
# mu2e -c Offline/CaloCluster/fcl/from_mcs-gnn-prod.fcl \
# -s <mcs.art> -T mcs.gnn.art -n 100
#
# The output art file carries:
# * caloClusterMaker :: CaloClusterCollection ("") -- BFS, untouched
# * caloClusterMakerGNN :: CaloClusterCollection ("GNN") -- GNN
#
# Task 16h (production FHiCL wiring).

#include "Offline/fcl/minimalMessageService.fcl"
#include "Offline/fcl/standardServices.fcl"
#include "Offline/CaloCluster/fcl/prolog.fcl"

process_name : GnnProd

source : { module_type : RootInput }

services : @local::Services.Reco

physics : {
producers : {
caloHitGraphMakerGNN : @local::CaloClusterGNN.caloHitGraphMakerGNN
caloClusterMakerGNN : @local::CaloClusterGNN.caloClusterMakerGNN
}

GnnPath : [ @sequence::CaloClusterGNN.Reco ]
OutPath : [ out ]

trigger_paths : [ GnnPath ]
end_paths : [ OutPath ]
}

outputs : {
out : {
module_type : RootOutput
fileName : "mcs.gnn.art"
# CaloHitGraphCollection is a transient data product (no ROOT
# dictionary by design -- see Offline/RecoDataProducts/inc/CaloHitGraph.hh
# and offline_integration.md 2.2). Drop it from the output art file.
outputCommands : [ "keep *",
"drop *_caloHitGraphMakerGNN_*_*" ]
}
}
72 changes: 72 additions & 0 deletions CaloCluster/fcl/from_mcs-gnn-test.fcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Smoke + parity test for the GNN clustering split design.
#
# Reads MCS art files (which carry CaloHitCollection produced by
# CaloHitMaker in the Reconstruct process), runs the two new
# EDProducers, and dumps the GNN cluster assignments per
# event-disk to a flat TTree for byte-comparison against the
# Python pipeline.
#
# Usage (build node, with u092):
# mu2e -c Offline/CaloCluster/fcl/from_mcs-gnn-test.fcl \
# <slim.art> -T parity_dump.root -n 100
#
# Companion Python script:
# calorimeter/GNN/scripts/compare_parity_dump.py
#
# Task 16g, Stage 3.

#include "Offline/fcl/minimalMessageService.fcl"
#include "Offline/fcl/standardServices.fcl"

process_name : GnnTest

source : { module_type : RootInput }

services : @local::Services.Reco

physics : {

producers : {

caloHitGraphMakerGNN : {
module_type : "CaloHitGraphMaker"
caloHitCollection : "CaloHitMaker"
normSidecar : "Offline/CaloCluster/data/calo_cluster_net_v2_stage1.norm.json"
rMax : 210.0
dtMax : 25.0
kMin : 3
kMax : 20
}

caloClusterMakerGNN : {
module_type : "CaloClusterMakerGNN"
caloHitGraphCollection : "caloHitGraphMakerGNN"
modelPath : "Offline/CaloCluster/data/calo_cluster_net_v2_stage1.onnx"
expectedModelVersion : "calo-cluster-net-v2-stage1"
expectedNodeFeatures : ["log_e","t","x","y","r","e_rel"]
expectedEdgeFeatures : ["dx","dy","d","dt","dlog_e","asym_e","logsum_e","dr"]
tauEdge : 0.20
bfsExpandCut : 10.0
minHits : 2
minEnergyMeV : 10.0
outputInstance : "GNN"
}
}

analyzers : {

parityDump : {
module_type : "CaloHitGraphParityDump"
caloHitCollection : "CaloHitMaker"
caloClusterCollection : "caloClusterMakerGNN:GNN"
}
}

GnnPath : [ caloHitGraphMakerGNN, caloClusterMakerGNN ]
DumpPath: [ parityDump ]

trigger_paths : [ GnnPath ]
end_paths : [ DumpPath ]
}

services.TFileService.fileName : "parity_dump.root"
64 changes: 64 additions & 0 deletions CaloCluster/fcl/prolog.fcl
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,68 @@ CaloCluster : { @table::CaloCluster
Reco : [ CaloProtoClusterMaker, CaloClusterMaker, CaloClusterFastMaker ]
}


# ---------------------------------------------------------------------
# GNN clustering (split design -- see calorimeter/GNN/docs/offline_integration.md).
#
# Two producers run alongside the existing BFS chain (CaloProtoClusterMaker
# + CaloClusterMaker), reading the same CaloHitCollection. The graph
# producer emits a transient CaloHitGraphCollection that the cluster
# producer consumes. The cluster producer's output ships under instance
# name "GNN" so downstream consumers select via (module_label, "GNN")
# and existing BFS-reading analyses keep working unchanged.
#
# Production FCLs that want both BFS and GNN clustering should append:
# physics.producers.caloHitGraphMakerGNN : @local::CaloClusterGNN.caloHitGraphMakerGNN
# physics.producers.caloClusterMakerGNN : @local::CaloClusterGNN.caloClusterMakerGNN
# physics.<reco-path> : [ ..., CaloHitMaker, CaloProtoClusterMaker, CaloClusterMaker,
# caloHitGraphMakerGNN, caloClusterMakerGNN ]
#
# Or use the bundled sequence:
# physics.<reco-path> : [ ..., CaloClusterGNN.Reco ]
#
# Frozen recipe values (CCN+BFS10, calorimeter/GNN/docs/findings.md 7.4):
# tauEdge=0.20 bfsExpandCut=10.0 minHits=2 minEnergyMeV=10.0
#
# To swap in SimpleEdgeNet for an A/B comparison job, declare a second
# instance with model_path/expected_model_version pointing at sen.onnx
# and tauEdge=0.26 (see offline_integration.md 2.2).

CaloClusterGNN : {
caloHitGraphMakerGNN :
{
module_type : CaloHitGraphMaker
caloHitCollection : CaloHitMaker
normSidecar : "Offline/CaloCluster/data/calo_cluster_net_v2_stage1.norm.json"
rMax : 210.0
dtMax : 25.0
kMin : 3
kMax : 20
}

caloClusterMakerGNN :
{
module_type : CaloClusterMakerGNN
caloHitGraphCollection : caloHitGraphMakerGNN
modelPath : "Offline/CaloCluster/data/calo_cluster_net_v2_stage1.onnx"
expectedModelVersion : "calo-cluster-net-v2-stage1"
expectedNodeFeatures : ["log_e","t","x","y","r","e_rel"]
expectedEdgeFeatures : ["dx","dy","d","dt","dlog_e","asym_e","logsum_e","dr"]
tauEdge : 0.20
bfsExpandCut : 10.0
minHits : 2
minEnergyMeV : 10.0
outputInstance : "GNN"
}
}

CaloClusterGNN : { @table::CaloClusterGNN
producers : {
caloHitGraphMakerGNN : { @table::CaloClusterGNN.caloHitGraphMakerGNN }
caloClusterMakerGNN : { @table::CaloClusterGNN.caloClusterMakerGNN }
}

Reco : [ caloHitGraphMakerGNN, caloClusterMakerGNN ]
}

END_PROLOG
60 changes: 60 additions & 0 deletions CaloCluster/inc/GnnClusterAssembler.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#ifndef CaloCluster_GnnClusterAssembler_hh
#define CaloCluster_GnnClusterAssembler_hh
//
// C++ port of calorimeter/GNN/src/inference/cluster_reco.py for the
// CCN+BFS10 recipe (the winning configuration in
// docs/findings.md §7.4).
//
// Steps applied to the directed edge logits emitted by the ONNX model:
// 1. Sigmoid → per-edge probabilities.
// 2. Symmetrise: for each unordered pair {i, j}, take the mean of
// p_ij and p_ji.
// 3. Threshold at tauEdge.
// 4. BFS traversal seeded from highest-energy hits — hits with
// energy >= bfsExpandCut continue the BFS; lower-energy hits join
// but cannot recruit. Mirrors Offline's ClusterFinder ExpandCut.
// 5. Cleanup: drop clusters with fewer than minHits hits or total
// energy below minEnergyMeV.
// 6. Relabel to contiguous IDs.
//
// Returns labels[N] where labels[i] = cluster ID >= 0 or -1 (dropped).
//

#include <cstdint>
#include <vector>

namespace mu2e {

class GnnClusterAssembler
{
public:
struct Config
{
double tauEdge = 0.20; // probability threshold (model-specific)
double bfsExpandCut = 10.0; // MeV — BFS-style ExpandCut
unsigned minHits = 2; // drop clusters smaller than this
double minEnergyMeV = 10.0; // drop clusters below this total energy
};

explicit GnnClusterAssembler(const Config& cfg) : cfg_(cfg) {}

// nNodes : number of hits in the graph
// edgeIndex : flat (2 * E) int64s, src row first then dst row,
// matching the CaloHitGraph layout
// edgeLogits : pre-sigmoid logits emitted by the ONNX model (size E)
// hitEnergiesMeV: per-node raw energies in MeV (size N)
//
// Returns a vector of length N: labels[i] = cluster ID (>= 0) or
// -1 (unclustered after min_hits / min_energy_mev cleanup).
std::vector<int> assemble(int nNodes,
const std::vector<int64_t>& edgeIndex,
const std::vector<float>& edgeLogits,
const std::vector<float>& hitEnergiesMeV) const;

private:
Config cfg_;
};

}

#endif
84 changes: 84 additions & 0 deletions CaloCluster/inc/GnnGraphBuilder.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#ifndef CaloCluster_GnnGraphBuilder_hh
#define CaloCluster_GnnGraphBuilder_hh
//
// C++ port of calorimeter/GNN/src/data/graph_builder.py.
//
// Builds one CaloHitGraph per calorimeter disk per event:
// 1. Collect CaloHits per disk; look up (x, y) in the disk-local
// frame from the Calorimeter geometry service.
// 2. Brute-force pairwise distance loop with r_max cut for the
// radius graph (faithful to scipy.spatial.cKDTree.query_pairs).
// 3. Time filter |dt| < dt_max ns.
// 4. kNN fallback for nodes with degree < k_min after the radius+time pass.
// 5. Per-source-node degree cap at k_max (keep the k_max nearest dsts).
// 6. Compute 6 node features and 8 edge features.
// 7. Z-score normalise using the train-split stats from the JSON
// sidecar passed at construction (loaded via loadStatsFromJson).
//
// Feature column order is canonical and matches the model's
// metadata_props (see calorimeter/GNN/docs/onnx_deployment.md):
//
// nodes : log_e, t, x, y, r, e_rel
// edges : dx, dy, d, dt, dlog_e, asym_e, logsum_e, dr
//

#include "Offline/CalorimeterGeom/inc/Calorimeter.hh"
#include "Offline/RecoDataProducts/inc/CaloHit.hh"
#include "Offline/RecoDataProducts/inc/CaloHitGraph.hh"

#include "canvas/Persistency/Common/Ptr.h"

#include <string>
#include <vector>

namespace mu2e {

class GnnGraphBuilder
{
public:
// Per-feature normalisation stats (z-score: (x - mean) / std).
struct Stats
{
std::vector<float> nodeMean; // size 6
std::vector<float> nodeStd; // size 6
std::vector<float> edgeMean; // size 8
std::vector<float> edgeStd; // size 8
};

struct Config
{
double rMax = 210.0; // mm — radius graph cut
double dtMax = 25.0; // ns — time-coincidence cut
unsigned kMin = 3; // kNN fallback floor
unsigned kMax = 20; // per-source-node degree cap
};

GnnGraphBuilder(const Config& cfg, const Stats& stats)
: cfg_(cfg), stats_(stats) {}

// Load Stats from the JSON sidecar produced by
// calorimeter/GNN/scripts/export_norm_stats.py. Throws
// cet::exception on missing keys, wrong sizes, or canonical
// node/edge feature-name mismatches.
static Stats loadStatsFromJson(const std::string& jsonPath);

// Build one CaloHitGraph for one disk.
// diskID — destination disk for the emitted graph
// hits — pointers to the CaloHits on this disk
// ptrs — art::Ptr back to each hit, parallel to `hits`
// cal — geometry handle for crystal positions
// out — populated in place (cleared first)
void buildGraph(int diskID,
const std::vector<const CaloHit*>& hits,
const std::vector<art::Ptr<CaloHit>>& ptrs,
const Calorimeter& cal,
CaloHitGraph& out) const;

private:
Config cfg_;
Stats stats_;
};

}

#endif
Loading