From bbe08a48d98a5380876853797b5561dda9a6e945 Mon Sep 17 00:00:00 2001 From: Yongyi Wu Date: Sat, 11 Apr 2026 17:39:38 -0500 Subject: [PATCH 1/4] separated PR (Diffusion Model resampler for STM) from PR #1794 --- MCDataProducts/inc/GenId.hh | 6 +- MachineLearningTools/CMakeLists.txt | 11 + .../inc/ScoreBasedDiffusionModel.hh | 321 +++++ MachineLearningTools/src/SConscript | 20 + .../src/ScoreBasedDiffusionModel.cc | 1048 +++++++++++++++++ STMMC/CMakeLists.txt | 39 + STMMC/inc/VDResamplerTransformDefaults.hh | 13 + STMMC/src/SConscript | 2 + STMMC/src/VDResamplerConfigure_module.cc | 271 +++++ .../VDResamplerGenerateFromModel_module.cc | 342 ++++++ STMMC/src/VDResamplerGenerateMix_module.cc | 500 ++++++++ STMMC/src/VDResamplerTrain_module.cc | 397 +++++++ 12 files changed, 2967 insertions(+), 3 deletions(-) create mode 100644 MachineLearningTools/CMakeLists.txt create mode 100644 MachineLearningTools/inc/ScoreBasedDiffusionModel.hh create mode 100644 MachineLearningTools/src/SConscript create mode 100644 MachineLearningTools/src/ScoreBasedDiffusionModel.cc create mode 100644 STMMC/inc/VDResamplerTransformDefaults.hh create mode 100644 STMMC/src/VDResamplerConfigure_module.cc create mode 100644 STMMC/src/VDResamplerGenerateFromModel_module.cc create mode 100644 STMMC/src/VDResamplerGenerateMix_module.cc create mode 100644 STMMC/src/VDResamplerTrain_module.cc diff --git a/MCDataProducts/inc/GenId.hh b/MCDataProducts/inc/GenId.hh index 1c781150a6..d7feb7f715 100644 --- a/MCDataProducts/inc/GenId.hh +++ b/MCDataProducts/inc/GenId.hh @@ -44,8 +44,8 @@ namespace mu2e { cosmicCRY, pbarFlat, fromAscii, ExternalRMC, InternalRMC, CeLeadingLog, cosmicCORSIKA, //44 MuCapProtonGenTool, MuCapDeuteronGenTool, DIOGenTool, MuCapNeutronGenTool, // 48 MuCapPhotonGenTool, MuCapGammaRayGenTool, CeLeadingLogGenTool, MuplusMichelGenTool,// 52 - gammaPairProduction, antiproton, Mu2eXGenTool,//55 - lastEnum //56 + gammaPairProduction, antiproton, Mu2eXGenTool, STMDownStreamGenTool,//56 + lastEnum //57 }; #ifndef SWIG @@ -64,7 +64,7 @@ namespace mu2e { "CosmicCRY", "pbarFlat","fromAscii","ExternalRMC","InternalRMC","CeLeadingLog", "CosmicCORSIKA", \ "MuCapProtonGenTool", "MuCapDeuteronGenTool", "DIOGenTool", "MuCapNeutronGenTool", \ "MuCapPhotonGenTool", "MuCapGammaRayGenTool","CeLeadingLogGenTool","MuplusMichelGenTool", \ - "gammaPairProduction", "antiproton", "Mu2eXGenTool" + "gammaPairProduction", "antiproton", "Mu2eXGenTool", "STMDownStreamGenTool" #endif public: diff --git a/MachineLearningTools/CMakeLists.txt b/MachineLearningTools/CMakeLists.txt new file mode 100644 index 0000000000..b837296aff --- /dev/null +++ b/MachineLearningTools/CMakeLists.txt @@ -0,0 +1,11 @@ +cet_make_library( + SOURCE + src/ScoreBasedDiffusionModel.cc + LIBRARIES PUBLIC + CLHEP::CLHEP + messagefacility::messagefacility + cetlib_except::cetlib_except +) + +install_source(SUBDIRS src) +install_headers(USE_PROJECT_NAME SUBDIRS inc) diff --git a/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh b/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh new file mode 100644 index 0000000000..2bf4c7c697 --- /dev/null +++ b/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh @@ -0,0 +1,321 @@ +// Module for training and using score-based diffusion model +// Added by Yongyi Wu +// Mar. 2026 + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "CLHEP/Random/RandomEngine.h" +#include "CLHEP/Random/RandFlat.h" +#include "CLHEP/Random/RandGaussQ.h" + +#include "messagefacility/MessageLogger/MessageLogger.h" +#include "cetlib_except/exception.h" + +namespace mu2e{ + struct DiffusionTrainingSample { + std::vector x; // transformed state vector to diffuse (size = dim) + std::vector cond; // optional conditioning vector (size = conditionDim) + }; + + class ScoreBasedDiffusionModel { + public: + // Enumeration for optimizer selection + enum class OptimizerType { + SGD, // Stochastic Gradient Descent + ADAM // Adam optimizer + }; + + // Enumeration for noise schedule selection + enum class NoiseScheduleType { + LINEAR, // Linear noise schedule + COSINE // Cosine noise schedule + }; + + // Constructor: Initialize diffusion model with CLHEP random distributions. + // + // Parameters: + // randFlat - Reference to CLHEP RandFlat for uniform sampling (externally managed) + // randGaussQ - Reference to CLHEP RandGaussQ for Gaussian noise (externally managed) + // dim - Dimensionality of the state space + // conditionDim - Dimensionality of the optional conditioning vector (default: 0 for unconditional model) + // hidden - Size of hidden layers in the neural network + // layers - Number of layers in the network + // optimizerType - Type of optimizer to use (SGD or ADAM, default: ADAM) + // adamBeta1 - Adam optimizer beta1 parameter (default: 0.9) + // adamBeta2 - Adam optimizer beta2 parameter (default: 0.999) + // adamEps - Adam optimizer epsilon parameter (default: 1e-8) + // scheduleType - Type of noise schedule (LINEAR or COSINE, default: COSINE) + // betaMin - Minimum noise schedule parameter (for LINEAR schedule, default: 1e-4) + // betaMax - Maximum noise schedule parameter (for LINEAR schedule, default: 0.02) + // cosineOffset - Offset parameter (for cosine schedule, default: 0.008) + // batchSize - Batch size for training (default: 32) + // gradientClipThreshold - Threshold for gradient clipping (default: 1.0) + // learningRate - Learning rate for training (default: 1e-3) + // diffusionSteps - Number of steps in the diffusion process (default: 200) + ScoreBasedDiffusionModel( + // Network architecture parameters + CLHEP::RandFlat& randFlat, + CLHEP::RandGaussQ& randGaussQ, + int dim, + int conditionDim, + int hidden, + int layers, + // Optimizer configuration + OptimizerType optimizerType = OptimizerType::ADAM, + double adamBeta1 = 0.9, + double adamBeta2 = 0.999, + double adamEps = 1e-8, + // Noise schedule configuration + NoiseScheduleType scheduleType = NoiseScheduleType::COSINE, + double betaMin = 1e-4, + double betaMax = 0.02, + double cosineOffset = 0.008, + // Training configuration + int batchSize = 32, + double gradientClipThreshold = 1.0, + double learningRate = 1e-3, + // Diffusion process configuration + int diffusionSteps = 200 + ); + + // Train the score network on a batch of samples. + // Uses random sampling and noise injection via the external engine. + // Note that training needs to occur on all data samples. Training on multiple small subsets + // of data and then averaging or aggregating the model parameters is not supported and may lead to + // issues as neural networks are not linear. + // + // Parameters: + // data - Training samples (transformed state vectors) + // epochs - Number of training epochs to perform + void train( + const std::vector& data, + int epochs + ); + + // Generate a new sample from the diffusion model via reverse process. + // Uses the external random engine for noise generation during sampling. + // + // Parameters: + // condition - Optional conditioning vector (must match conditionDim_ when enabled) + // useHeun - If true, uses Heun's method (2nd order, default). If false, uses Euler's method (1st order) + // diffusionSteps - Number of diffusion steps for sampling (default: -1 uses the model's configured diffusionSteps_) + // + // Returns: A generated sample vector of dimension dim_ + std::vector generateSample( + const std::vector& condition = {}, + bool useHeun = true, + int diffusionSteps = -1 + ); + + // Save the model parameters to a CSV file with annotations for later use. + // Uses a default filename of "DiffusionModel.csv" if not specified. + // + // Parameters: + // filename - Path to the CSV file where model parameters will be saved (default: "DiffusionModel.csv") + void saveModel(const std::string& filename = "DiffusionModel.csv"); + + // Load model parameters from a file to restore a previously trained model. + // Note that as the adam optimizer state is not saved, it is not possible to resume training from a loaded + // model and pick up the training process where it left off. The loaded model can only be used for sampling. + // + // Parameters: + // randFlat / RandGaussQ - CLHEP random number generator wrappers being passed + // filename - Path to the file from which model parameters will be loaded + static ScoreBasedDiffusionModel loadModel( + CLHEP::RandFlat& randFlat, + CLHEP::RandGaussQ& randGaussQ, + const std::string& filename + ); + + private: + + // ----- network ----- + + // Represents a single fully-connected layer with weights and biases. + struct Layer { + std::vector> W; // Weight matrix [output_size][input_size] + std::vector b; // Bias vector [output_size] + + // storage for gradients during back-propagation + std::vector> gradW; // gradient of loss w.r.t. weights + std::vector gradb; // gradient of loss w.r.t. biases + + // Adam optimizer state buffers + std::vector> mW; // First moment estimates for weights, m_t = beta1 m_{t-1} + (1-beta1) g_t + std::vector> vW; // Second moment estimates for weights, v_t = beta2 v_{t-1} + (1-beta2) g_t^2 + + std::vector mb; // First moment estimates for biases + std::vector vb; // Second moment estimates for biases + }; + + std::vector network_; // Network layers in forward order + + // Storage for activations and pre-activations during forward pass (used in back-propagation) + // During forward pass, preactivations_[l] = W[l] * activations_[l-1] + b[l], + // and activations_[l] = activationFunction(preactivations_[l]) + // These are needed to compute gradients during the backward pass. + std::vector> activations_; + std::vector> preactivations_; + + // Forward pass through the network. + // Computes the score function. + // input actually contains the state vector, optional condition vector, and the time embedding. + // dimension hence is dim_ + conditionDim_ + 1. + // + // Parameters: + // x - Input vector of dimension dim_ + conditionDim_ + 1 + // + // Returns: Output vector (dimension depends on network architecture) + std::vector forward( + const std::vector& x + ); + + // Backward pass for gradient computation. + // Computes gradients w.r.t. network parameters via chain rule. + // + // Parameters: + // gradOutput - Gradient of loss w.r.t. network output + // + // Returns: Gradient of loss w.r.t. network parameters (gradW and gradb for each layer) + void backward( + const std::vector& gradOutput + ); + + // Update network weights using computed gradients (Stochastic Gradient Descent SGD). + // Applied after backward pass. Alternately, an Adam optimizer step can be implemented + // in adamUpdate() for better convergence. + // + // Parameters: + // lr - Learning rate for gradient descent step + void updateWeights(double lr); + + // Update network weights using Adam optimization algorithm. + // This method would implement the Adam update rule using the stored gradients and moment estimates. + // + // Parameters: + // lr - Learning rate for Adam update + void adamUpdate(double lr); + + // ----- diffusion ----- + + // Noise schedule parameter beta(t) over diffusion time [0,1]. + // Linear interpolation between betaMin_ and betaMax_. + // + // Parameters: + // t - Diffusion time parameter in [0,1] + // + // Returns: Beta value for the given time step + double beta(double t) const; + + // Standard deviation of noise at diffusion time t. + // Related to the noise schedule via sigma(t) = sqrt(1 - exp(-integral(beta(s) ds))). + // + // Parameters: + // t - Diffusion time parameter in [0,1] + // + // Returns: Noise standard deviation at time t + double sigma(double t) const; + + // Add Gaussian noise to a state vector at diffusion time t. + // Uses external engine (randGaussQ_) for reproducible noise generation. + // Noise sample is stored in eps for later use in training. + // + // Parameters: + // x - Original state vector + // t - Diffusion time parameter in [0,1] + // eps - Output: Gaussian noise vector used for perturbation (size = dim_) + // + // Returns: Noisy state = x + sigma(t) * eps + std::vector addNoise( + const std::vector& x, + double t, + std::vector& eps + ); + + // ----- loss ----- + + // Compute Mean Squared Error between predicted score and target score. + // Used during training to measure model performance. + // + // Parameters: + // score - Model's predicted score vector + // target - Ground truth target score vector + // + // Returns: MSE loss value (scalar) + double computeLoss( + const std::vector& score, + const std::vector& target + ) const; + + // Clip gradients to prevent exploding gradients during training. + // + // Parameters: + // maxNorm - Maximum allowed norm for the gradients. If the total norm exceeds this threshold, + // gradients are scaled down to have norm equal to maxNorm. + void clipGradients(double maxNorm); + + // ----- internal vars ----- + + // The random engine is NOT owned by this class. It is injected externally + // by the framework. Below are CLHEP distribution wrappers for actual random number generation. + // These wrap the engine_ and provide specific probability distributions. + // - RandFlat: Uniform distribution on [0,1) + // - RandGaussQ: Gaussian (normal) distribution with mean=0, sigma=1 (or custom) + // Both are bound in the constructor to externally managed wrappers. + CLHEP::RandFlat& randFlat_; // Used for uniform sampling (e.g., batch selection) + CLHEP::RandGaussQ& randGaussQ_; // Used for Gaussian noise in diffusion process + + // Model hyperparameters + int dim_; // Dimensionality of state space + int conditionDim_; // Dimensionality of the optional conditioning vector + int hidden_; // Hidden layer size + int layers_; // Number of network layers + + // Optimizer configuration + OptimizerType optimizerType_; // Type of optimizer to use (SGD or ADAM) + + // Adam optimizer parameters + double adamBeta1_; // First moment exponential decay rate (default: 0.9) + double adamBeta2_; // Second moment exponential decay rate (default: 0.999) + double adamEps_; // Small constant for numerical stability (default: 1e-8) + + // Noise schedule configuration + NoiseScheduleType noiseScheduleType_; // Type of noise schedule (LINEAR or COSINE) + + // Linear noise schedule parameters (beta(t) = betaMin + t*(betaMax - betaMin)) + // default betaMin = 1e-4, betaMax = 0.02 are typical values used in diffusion models, but can be tuned for specific applications. + double betaMin_; // Beta value at t=0 + double betaMax_; // Beta value at t=1 + + // Cosine noise schedule parameter (offset to avoid singularity at t=0 in cosine schedule, default: 0.008) + double cosineOffset_; + + // Training configuration + int batchSize_; // Batch size for vectorized training (default: 32) + double gradientClipThreshold_; // Gradient clipping threshold (default: 1.0) + double learningRate_; // Learning rate for training (default: 1e-3) + + // Diffusion process discretization + int diffusionSteps_; // Number of time steps to generate a sample (default: 200) + + // Training state + double runningLoss_; // Accumulated loss for monitoring during training + int adamStep_; // Step counter for Adam optimizer (used to compute bias-corrected moment estimates) + int trainingSampleSize_; // Total number of training samples + + // Container for tracking training loss over epochs + std::vector epochLosses_; + + }; +} diff --git a/MachineLearningTools/src/SConscript b/MachineLearningTools/src/SConscript new file mode 100644 index 0000000000..f74c4ad579 --- /dev/null +++ b/MachineLearningTools/src/SConscript @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# +# Script to build the files found in this directory. +# + +import os +Import('env') +Import('mu2e_helper') + +helper=mu2e_helper(env) + +mainlib = helper.make_mainlib ( [ 'CLHEP', + 'cetlib', + 'cetlib_except', + 'MF_MessageLogger' ] ) + +# This tells emacs to view this file in python mode. +# Local Variables: +# mode:python +# End: diff --git a/MachineLearningTools/src/ScoreBasedDiffusionModel.cc b/MachineLearningTools/src/ScoreBasedDiffusionModel.cc new file mode 100644 index 0000000000..5cf8b2dc34 --- /dev/null +++ b/MachineLearningTools/src/ScoreBasedDiffusionModel.cc @@ -0,0 +1,1048 @@ +#include "Offline/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh" + +namespace mu2e { + + // SiLU activation function (Sigmoid Linear Unit) + // Defined as: silu(x) = x / (1 + exp(-x)) + static double silu(double x) { + return x / (1.0 + std::exp(-x)); + } + + // Derivative of SiLU activation function, needed for back-propagation. + // Defined as: silu'(x) = sigmoid(x) * (1 + x * (1 - sigmoid(x))) + static double siluDeriv(double x) { + double s = 1.0 / (1.0 + std::exp(-x)); + return s * (1.0 + x * (1.0 - s)); + } + + // Linear interpolation of beta(t) between betaMin_ and betaMax_. + // This is only used in the linear noise schedule. + double ScoreBasedDiffusionModel::beta(double t) const { + return betaMin_ + t * (betaMax_ - betaMin_); + } + + // Standard deviation of noise at diffusion time t. + // For the linear schedule, use simply sqrt(beta(t)). + // For the cosine schedule, this is derived from the cumulative noise schedule. + double ScoreBasedDiffusionModel::sigma(double t) const { + if (noiseScheduleType_ == NoiseScheduleType::COSINE) { + // Cosine noise schedule + double f = (t + cosineOffset_) / (1.0 + cosineOffset_); + double alpha_bar = std::cos(f * M_PI * 0.5); + alpha_bar *= alpha_bar; + return std::sqrt(1.0 - alpha_bar); + } else { + // Linear noise schedule (default) + return std::sqrt(beta(t)); + } + } + + + ScoreBasedDiffusionModel::ScoreBasedDiffusionModel( + CLHEP::RandFlat& randFlat, + CLHEP::RandGaussQ& randGaussQ, + int dim, + int conditionDim, + int hidden, + int layers, + OptimizerType optimizerType, + double adamBeta1, + double adamBeta2, + double adamEps, + NoiseScheduleType scheduleType, + double betaMin, + double betaMax, + double cosineOffset, + int batchSize, + double gradientClipThreshold, + double learningRate, + int diffusionSteps + ) : randFlat_(randFlat), randGaussQ_(randGaussQ), + dim_(dim), conditionDim_(conditionDim), hidden_(hidden), layers_(layers), + optimizerType_(optimizerType), + adamBeta1_(adamBeta1), adamBeta2_(adamBeta2), adamEps_(adamEps), + noiseScheduleType_(scheduleType), + betaMin_(betaMin), betaMax_(betaMax), cosineOffset_(cosineOffset), + batchSize_(batchSize), gradientClipThreshold_(gradientClipThreshold), learningRate_(learningRate), + diffusionSteps_(diffusionSteps), + runningLoss_(0.0), adamStep_(0), trainingSampleSize_(0), epochLosses_() { + + // Validate model dimensions and parameters + if (dim <= 0 || conditionDim < 0 || hidden <= 0 || layers <= 0) { + throw cet::exception("ScoreBasedDiffusionModel::initialization") << "Invalid model dimensions"; + } + if (batchSize <= 0) { + throw cet::exception("ScoreBasedDiffusionModel::initialization") << "Invalid batchSize"; + } + if (diffusionSteps <= 0) { + throw cet::exception("ScoreBasedDiffusionModel::initialization") << "Invalid diffusionSteps"; + } + + // ------------------------------------------------------------ + // Network architecture + // + // Input dimension = dim_ + conditionDim_ + 1 + // (+1 because diffusion time t is appended to the input vector) + // ------------------------------------------------------------ + + int inputSize = dim_ + conditionDim_ + 1; + int in = inputSize; + + // Weight initialization scale (local constant so it can be tuned easily) + // const double weightInitScale = 0.02; // not scalable with size, can lead to instability for larger models and too slow training for smaller models + // const double weightInitScale = std::sqrt(2.0 / in); // He initialization for ReLU activations, SiLU is similar to ReLU in terms of variance preservation + double reducedHe = 0.5 * std::sqrt(2.0 / in); // Scaled He initialization found to improve stability + const double weightInitScale = std::min(reducedHe, 0.3); // Cap the weight initialization scale to prevent instability for very small input sizes + + for (int l = 0; l < layers_; ++l) { + + // Last layer outputs the score vector (dimension = dim_) + int out = (l == layers_ - 1) ? dim_ : hidden_; + + Layer layer; + + // Allocate weights + layer.W.resize(out, std::vector(in)); + + // Allocate biases + layer.b.resize(out); + + // Allocate gradient buffers + layer.gradW.resize(out, std::vector(in, 0.0)); + layer.gradb.resize(out, 0.0); + + // Allocate Adam buffers + layer.mW.resize(out, std::vector(in, 0.0)); + layer.vW.resize(out, std::vector(in, 0.0)); + + layer.mb.resize(out, 0.0); + layer.vb.resize(out, 0.0); + + // -------------------------------------------------------- + // Weight initialization + // + // Small Gaussian initialization improves training stability + // -------------------------------------------------------- + + for (int i = 0; i < out; ++i) { + for (int j = 0; j < in; ++j) { + layer.W[i][j] = weightInitScale * randGaussQ_.fire(); + // this generates a Gaussian random number with mean=0 and sigma=weightInitScale + } + layer.b[i] = 0.0; + } + + if (layer.W.empty() || layer.W[0].size() != static_cast(in) || //Check that weight matrix has correct input dimension + layer.W.size() != static_cast(out) || //Check that weight matrix has correct output dimension + layer.b.size() != static_cast(out)) { //Check that bias vector has correct dimension + throw cet::exception("ScoreBasedDiffusionModel::initialization") + << "Layer shape initialization mismatch"; + } + + network_.push_back(layer); + + // Output becomes next layer input + in = out; + } + + // Print layer and model configuration + std::ostringstream oss; + oss << "ScoreBasedDiffusionModel initialized\n" + << "Model configuration:\n" + << " Network architecture:\n" + << " | dim=" << dim_ << "\n" + << " | conditionDim=" << conditionDim_ << "\n" + << " | hidden=" << hidden_ << "\n" + << " | layers=" << layers_ << "\n" + << " Optimizer configuration:\n" + << " | Optimizer=" << (optimizerType_ == OptimizerType::ADAM ? "Adam" : "SGD") << "\n"; + if (optimizerType_ == OptimizerType::ADAM) { + oss << " |- AdamBeta1=" << adamBeta1_ << "\n" + << " |- AdamBeta2=" << adamBeta2_ << "\n" + << " |- AdamEps=" << adamEps_ << "\n"; + } + oss << " Noise schedule configuration:\n" + << " | NoiseSchedule=" << (noiseScheduleType_ == NoiseScheduleType::COSINE ? "Cosine" : "Linear") << "\n"; + if (noiseScheduleType_ == NoiseScheduleType::COSINE) { + oss << " |- CosineOffset=" << cosineOffset_ << "\n"; + } else { + oss << " |- BetaMin=" << betaMin_ << "\n" + << " |- BetaMax=" << betaMax_ << "\n"; + } + oss << " Training configuration:\n" + << " | BatchSize=" << batchSize_ << "\n" + << " | GradientClipThreshold=" << gradientClipThreshold_ << "\n" + << " | LearningRate=" << learningRate_ << "\n" + << " Diffusion process configuration:\n" + << " | DiffusionSteps=" << diffusionSteps_ << "\n"; + mf::LogInfo("ScoreBasedDiffusionModel::initialize") << oss.str(); + + // Reserve space for forward-pass caches + activations_.reserve(layers_ + 1); + preactivations_.reserve(layers_); + } + + // forward pass to compute the score function given input (state + time embedding) + std::vector ScoreBasedDiffusionModel::forward( + const std::vector& input + ) + { + if (network_.empty() || network_[0].W.empty() || network_[0].W[0].empty()) { + throw cet::exception("ScoreBasedDiffusionModel::forward") + << "Network is not properly initialized"; + } + if (input.size() != network_[0].W[0].size()) { + throw cet::exception("ScoreBasedDiffusionModel::forward") + << "Input dimension mismatch: got " << input.size() + << ", expected " << network_[0].W[0].size(); + } + + activations_.clear(); + preactivations_.clear(); + + // Input vector contains both the state and the time embedding (e.g., concatenated together). + std::vector x = input; + + // Cache the input as the activation of layer 0 for use in back-propagation. + activations_.push_back(x); + + // Forward pass through the network layers + for (size_t l = 0; l < network_.size(); ++l) + { + // Compute pre-activation z = W*x + b for the current layer + auto& layer = network_[l]; + std::vector z(layer.W.size()); + for (size_t i = 0; i < layer.W.size(); ++i) + { + double v = layer.b[i]; + for (size_t j = 0; j < layer.W[i].size(); ++j) + { + v += layer.W[i][j]*x[j]; + } + z[i] = v; + } + + // Cache pre-activation for back-propagation + preactivations_.push_back(z); + + // Apply activation function (SiLU) to get the output of the current layer. + std::vector y(z.size()); + if(l != network_.size()-1) // No activation on the last layer (output layer), as it predicts the score directly. + { + for (size_t i=0;i& gradOutput + ) + { + std::vector grad = gradOutput; + + // Back-propagate through layers in reverse order + for (int l = static_cast(network_.size()) - 1; l >= 0; l--) + { + auto& layer = network_[l]; + + auto& aPrev = activations_[l]; + auto& z = preactivations_[l]; + + std::vector gradZ(grad.size()); + + // For the output layer, the gradient is directly from the loss w.r.t. output (no activation function). + if(l == static_cast(network_.size())-1) + { + gradZ = grad; + } + // For hidden layers, apply the derivative of the activation function (SiLU) to the gradient. + else + { + for (size_t i=0;i gradPrev(layer.W[0].size(),0.0); + + for (size_t j=0;j ScoreBasedDiffusionModel::addNoise( + const std::vector& x, + double t, // Diffusion time parameter in [0,1] + std::vector& eps + ){ + if (x.size() != static_cast(dim_)) { + throw cet::exception("ScoreBasedDiffusionModel::addNoise") + << "Input x dimension mismatch: got " << x.size() + << ", expected " << dim_; + } + + double s = sigma(t); + + // Generate Gaussian noise vector eps of dimension dim_ using the external random engine. + eps.resize(dim_); + + std::vector xt(dim_); + + for (int i = 0; i < dim_; ++i) { + eps[i] = randGaussQ_.fire(); // Gaussian N(0,1) + xt[i] = x[i] + s * eps[i]; + } + + return xt; + } + + // Compute Mean Squared Error per dimension between predicted score and target score. + double ScoreBasedDiffusionModel::computeLoss( + const std::vector& score, + const std::vector& target + ) const{ + + double loss = 0.0; + + for (int i=0;i& data, + int epochs + ) + { + // Check that the network is properly initialized before training. + if (network_.empty() || network_[0].W.empty() || network_[0].W[0].empty()) { + throw cet::exception("ScoreBasedDiffusionModel::train") + << "Network is not properly initialized"; + } + + const double eps_safe = 1e-12; + const size_t N = data.size(); + + trainingSampleSize_ = N; // Store the total number of training samples + + if (N <= 1) + { + throw cet::exception("ScoreBasedDiffusionModel::train") + << "Insufficient data samples for training (samples must be > 1)"; + } + + for (size_t i = 0; i < N; ++i) { + if (data[i].x.size() != static_cast(dim_)) { + throw cet::exception("ScoreBasedDiffusionModel::train") + << "Training sample " << i << " has x dimension " << data[i].x.size() + << ", expected " << dim_; + } + if (data[i].cond.size() != static_cast(conditionDim_)) { + throw cet::exception("ScoreBasedDiffusionModel::train") + << "Training sample " << i << " has conditioning dimension " << data[i].cond.size() + << ", expected " << conditionDim_; + } + } + + // Create index vector once + std::vector indices(N); + for (size_t i = 0; i < N; ++i) + indices[i] = i; + + // Data shuffling and batching is performed at the epoch level to ensure that each epoch sees the data + // in a different order, which can improve training convergence. + for (int e = 0; e < epochs; ++e) + { + // Shuffle indices using Fisher–Yates and RandFlat for reproducibility + for (size_t i = N - 1; i > 0; --i) + { + size_t j = static_cast(randFlat_.fire() * (i + 1)); + std::swap(indices[i], indices[j]); + } + + // Counter for number of samples processed in the epoch (used for averaging loss). Avoid using N directly to + // allow for early stopping or partial epoch processing if needed. + int n = 0; + + int batchCounter = 0; + double epochLoss = 0.0; + + // iterate over the shuffled data samples + for (size_t idx = 0; idx < N; ++idx) + { + const auto& sample = data[indices[idx]]; + + // Sample diffusion time + double t = randFlat_.fire(); + // container for noise vector eps + std::vector eps; + + auto xt = addNoise(sample.x,t,eps); + double s = sigma(t); + + // The target score is the negative of the noise scaled by the noise standard deviation, i.e., -eps/sigma(t). + std::vector target(dim_); + for (int i=0;i input = xt; + input.insert(input.end(), sample.cond.begin(), sample.cond.end()); + input.push_back(t); + + // Check that input dimension matches expected dimension (dim_ + conditionDim_ + 1) + if (input.size() != network_[0].W[0].size()) { + throw cet::exception("ScoreBasedDiffusionModel::train") + << "Training input dimension mismatch: got " << input.size() + << ", expected " << network_[0].W[0].size(); + } + + // Forward pass to compute the predicted score from the network given the input (noisy sample + time). + auto score = forward(input); + + // Compute the loss (Mean Squared Error) between the predicted score and the target score. + std::vector grad(dim_); + double loss=0.0; + for (int i=0;i(batchCounter); + for (auto& layer : network_) { + for (auto& row : layer.gradW) + for (auto& g : row) + g *= invBatch; + for (auto& g : layer.gradb) + g *= invBatch; + } + + clipGradients(gradientClipThreshold_); // Clip gradients to prevent exploding gradients + + // Apply optimizer based on configuration + if (optimizerType_ == OptimizerType::ADAM) { + adamUpdate(learningRate_); + } else { + updateWeights(learningRate_); + } + batchCounter = 0; + } + + epochLoss += loss; // Accumulate loss for monitoring. + n++; + } + + // Final flush: apply optimizer to remaining gradients + if(batchCounter > 0) + { + // Average by the true final mini-batch size (which may be < batchSize_). + const double invBatch = 1.0 / static_cast(batchCounter); + for (auto& layer : network_) { + for (auto& row : layer.gradW) + for (auto& g : row) + g *= invBatch; + for (auto& g : layer.gradb) + g *= invBatch; + } + + clipGradients(gradientClipThreshold_); // Clip gradients before final update + + if (optimizerType_ == OptimizerType::ADAM) { + adamUpdate(learningRate_); + } else { + updateWeights(learningRate_); + } + } + + epochLoss /= n; + mf::LogInfo("ScoreBasedDiffusionModel::train") << "Epoch " << e << " Loss=" << epochLoss; + epochLosses_.push_back(epochLoss); + } + } + + void ScoreBasedDiffusionModel::saveModel( + const std::string& filename + ) + { + std::ofstream out(filename); + + if (!out) { + throw cet::exception("ScoreBasedDiffusionModel::saveModel") + << "Cannot open file " << filename; + } + + // Write CSV header and model configuration parameters with annotations + out << "[MODEL_PARAMETERS]\n"; + out << "Parameter,Value\n"; + // Model architecture + out << "dim," << dim_ << "\n"; + out << "conditionDim," << conditionDim_ << "\n"; + out << "hidden," << hidden_ << "\n"; + out << "layers," << layers_ << "\n"; + // Optimizer configuration + out << "optimizerType," << (optimizerType_ == OptimizerType::ADAM ? "ADAM" : "SGD") << "\n"; + out << "adamBeta1," << adamBeta1_ << "\n"; + out << "adamBeta2," << adamBeta2_ << "\n"; + out << "adamEps," << adamEps_ << "\n"; + // Noise schedule configuration + out << "noiseScheduleType," << (noiseScheduleType_ == NoiseScheduleType::COSINE ? "COSINE" : "LINEAR") << "\n"; + out << "betaMin," << betaMin_ << "\n"; + out << "betaMax," << betaMax_ << "\n"; + out << "cosineOffset," << cosineOffset_ << "\n"; + // Training configuration + out << "batchSize," << batchSize_ << "\n"; + out << "gradientClipThreshold," << gradientClipThreshold_ << "\n"; + out << "learningRate," << learningRate_ << "\n"; + // Diffusion process configuration + out << "diffusionSteps," << diffusionSteps_ << "\n"; + + // Write network architecture header + out << "\n[NETWORK_PARAMETERS]\n"; + out << "numLayers," << network_.size() << "\n"; + out << std::fixed << std::setprecision(17); // Use fixed-point notation with high precision for weights and biases + + for (size_t layerIdx = 0; layerIdx < network_.size(); ++layerIdx) { + // Write layer dimensions + auto& layer = network_[layerIdx]; + size_t outSize = layer.W.size(); + size_t inSize = layer.W[0].size(); + out << "\nLayer" << layerIdx << "_OutSize," << outSize << "\n"; + out << "Layer" << layerIdx << "_InSize," << inSize << "\n"; + + // Write weights in matrix format + out << "Layer" << layerIdx << "_Weights\n"; + for (const auto& row : layer.W) { + for (size_t j = 0; j < row.size(); ++j) { + out << row[j]; + if (j < row.size() - 1) out << ","; + } + out << "\n"; + } + + // Write biases + out << "Layer" << layerIdx << "_Biases\n"; + for (size_t j = 0; j < layer.b.size(); ++j) { + out << layer.b[j]; + if (j < layer.b.size() - 1) out << ","; + } + out << "\n"; + } + + // Write training history + out << "\n[TRAINING_HISTORY]\n"; + out << "numEpochs," << epochLosses_.size() << "\n"; + out << "trainingSampleSize," << trainingSampleSize_ << "\n"; + out << "EpochNumber,Loss\n"; + for (size_t i = 0; i < epochLosses_.size(); ++i) { + out << i << "," << epochLosses_[i] << "\n"; + } + } + + ScoreBasedDiffusionModel ScoreBasedDiffusionModel::loadModel( + CLHEP::RandFlat& randFlat, + CLHEP::RandGaussQ& randGaussQ, + const std::string& filename + ) + { + std::ifstream in(filename); + + if (!in) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") + << "Cannot open file " << filename; + } + + std::string line; + + // Temporary storage + int dim = 0, conditionDim = 0, hidden = 0, layers = 0; + OptimizerType optimizerType = OptimizerType::ADAM; + double adamBeta1 = 0.0, adamBeta2 = 0.0, adamEps = 0.0; + NoiseScheduleType scheduleType = NoiseScheduleType::COSINE; + double betaMin = 0.0, betaMax = 0.0, cosineOffset = 0.0; + int batchSize = 1, diffusionSteps = 1; + double gradientClipThreshold = 0.0, learningRate = 0.0; + + std::vector loadedNetwork; + std::vector epochLosses; + + // Helper lambda to split CSV + auto split = [](const std::string& s) { + std::vector tokens; + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, ',')) { + // trim whitespace from item (beginning and end, should not be present if csv is generated by code, but just in case) + item.erase(item.begin(), std::find_if(item.begin(), item.end(), [](unsigned char ch) { return !std::isspace(ch); })); + item.erase(std::find_if(item.rbegin(), item.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), item.end()); + tokens.push_back(item); + } + return tokens; + }; + // Helper lambda to extract layer index from strings like "Layer10_OutSize" + auto getLayerIdx = [](const std::string& s) { + size_t start = 5; // after "Layer" + size_t end = s.find('_', start); + if (end == std::string::npos) + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Malformed layer string: " << s; + return std::stoi(s.substr(start, end - start)); + }; + + // Parse file + std::string section; + + while (std::getline(in, line)) { + if (line.empty()) continue; + + // Detect section headers + if (line[0] == '[') { + section = line; + continue; + } + + // MODEL PARAMETERS + if (section == "[MODEL_PARAMETERS]") { + if (line == "Parameter,Value") continue; + + auto tokens = split(line); + if (tokens.size() != 2) throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "ScoreBasedDiffusionModel::loadModel: invalid line in [MODEL_PARAMETERS] section: " << line; + + const std::string& key = tokens[0]; + const std::string& val = tokens[1]; + + if (key == "dim") dim = std::stoi(val); + else if (key == "conditionDim") conditionDim = std::stoi(val); + else if (key == "hidden") hidden = std::stoi(val); + else if (key == "layers") layers = std::stoi(val); + else if (key == "optimizerType") + optimizerType = (val == "ADAM") ? OptimizerType::ADAM : OptimizerType::SGD; + else if (key == "adamBeta1") adamBeta1 = std::stod(val); + else if (key == "adamBeta2") adamBeta2 = std::stod(val); + else if (key == "adamEps") adamEps = std::stod(val); + else if (key == "noiseScheduleType") + scheduleType = (val == "COSINE") ? NoiseScheduleType::COSINE : NoiseScheduleType::LINEAR; + else if (key == "betaMin") betaMin = std::stod(val); + else if (key == "betaMax") betaMax = std::stod(val); + else if (key == "cosineOffset") cosineOffset = std::stod(val); + else if (key == "batchSize") batchSize = std::stoi(val); + else if (key == "gradientClipThreshold") gradientClipThreshold = std::stod(val); + else if (key == "learningRate") learningRate = std::stod(val); + else if (key == "diffusionSteps") diffusionSteps = std::stoi(val); + } + + // NETWORK PARAMETERS + else if (section == "[NETWORK_PARAMETERS]") { + + if (line.rfind("numLayers", 0) == 0) { + int numLayers = std::stoi(split(line)[1]); + loadedNetwork.resize(numLayers); + if (loadedNetwork.size() != (size_t)layers) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Layer count mismatch"; + } + continue; + } + + // Layer sizes + if (line.find("_OutSize") != std::string::npos) { + auto tokens = split(line); + int outSize = std::stoi(tokens[1]); + int layerIdx = getLayerIdx(tokens[0]); + loadedNetwork[layerIdx].W.resize(outSize); + loadedNetwork[layerIdx].b.resize(outSize); + } + else if (line.find("_InSize") != std::string::npos) { + auto tokens = split(line); + int inSize = std::stoi(tokens[1]); + int layerIdx = getLayerIdx(tokens[0]); + if (layerIdx == 0 && inSize != dim + conditionDim + 1) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") + << "Input layer input size mismatch (expected " + << (dim + conditionDim + 1) << ", got " << inSize << ")"; + } + if (loadedNetwork[layerIdx].W.empty()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "InSize encountered before OutSize for layer " << layerIdx; + } + for (auto& row : loadedNetwork[layerIdx].W) + row.resize(inSize); + } + // Weights + else if (line.find("_Weights") != std::string::npos) { + int layerIdx = getLayerIdx(line); + if (layerIdx < 0 || layerIdx >= (int)loadedNetwork.size()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Invalid layer index: " << layerIdx; + } + if (loadedNetwork[layerIdx].W.empty() || loadedNetwork[layerIdx].W[0].empty()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Weights encountered before layer size definition"; + } + for (auto& row : loadedNetwork[layerIdx].W) { + if (!std::getline(in, line)) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Unexpected EOF while reading weights"; + } + auto vals = split(line); + if (vals.size() != row.size()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") + << "Weight row size mismatch for layer " << layerIdx; + } + for (size_t j = 0; j < vals.size(); ++j) + row[j] = std::stod(vals[j]); + } + } + // Biases + else if (line.find("_Biases") != std::string::npos) { + int layerIdx = getLayerIdx(line); + if (layerIdx < 0 || layerIdx >= (int)loadedNetwork.size()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Invalid layer index: " << layerIdx; + } + if (!std::getline(in, line)) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Unexpected EOF while reading biases"; + } + auto vals = split(line); + if (vals.size() != loadedNetwork[layerIdx].b.size()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") + << "Bias size mismatch for layer " << layerIdx; + } + for (size_t j = 0; j < vals.size(); ++j) + loadedNetwork[layerIdx].b[j] = std::stod(vals[j]); + } + } + + // TRAINING HISTORY + else if (section == "[TRAINING_HISTORY]") { + if (line == "EpochNumber,Loss") continue; + + auto tokens = split(line); + if (tokens.size() == 2) { + if (tokens[0] == "numEpochs") { + int numEpochs = std::stoi(tokens[1]); + epochLosses.reserve(numEpochs); + } else if (tokens[0] == "trainingSampleSize") + { + // We can store this if needed for analysis, but it is not used in model reconstruction, so we will ignore it for now. + }else { + epochLosses.push_back(std::stod(tokens[1])); + } + } + } + } + + if (dim <= 0 || conditionDim < 0 || hidden <= 0 || layers <= 0) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Invalid model parameters in file"; + } + for (size_t l = 0; l < loadedNetwork.size(); ++l) { + if (loadedNetwork[l].W.empty() || loadedNetwork[l].b.empty()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Incomplete layer data in file"; + } + for (const auto& row : loadedNetwork[l].W) { + if (row.size() != loadedNetwork[l].W[0].size()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Inconsistent row size in layer"; + } + } + } + for (size_t l = 1; l < loadedNetwork.size(); ++l) { + if (loadedNetwork[l].W[0].size() != loadedNetwork[l-1].W.size()) { + throw cet::exception("ScoreBasedDiffusionModel::loadModel") << "Layer size mismatch between layers"; + } + } + + std::ostringstream logMsg; + logMsg << "Model parameters loaded successfully from " << filename << "\n" + << "Warning: optimizer state (e.g., Adam moments) is not saved/loaded.\n" + << "The loaded model is suitable for inference, or for retraining with a fresh optimizer state,\n" + << "but does NOT resume training from the original state."; + mf::LogInfo("ScoreBasedDiffusionModel::loadModel") << logMsg.str(); + + // Reconstruct model + ScoreBasedDiffusionModel model( + randFlat, + randGaussQ, + dim, + conditionDim, + hidden, + layers, + optimizerType, + adamBeta1, + adamBeta2, + adamEps, + scheduleType, + betaMin, + betaMax, + cosineOffset, + batchSize, + gradientClipThreshold, + learningRate, + diffusionSteps + ); + + // Overwrite initialized weights with loaded values + for (size_t l = 0; l < model.network_.size(); ++l) { + model.network_[l].W = loadedNetwork[l].W; + model.network_[l].b = loadedNetwork[l].b; + } + model.epochLosses_ = epochLosses; + + return model; + } + + std::vector ScoreBasedDiffusionModel::generateSample( + const std::vector& condition, + bool useHeun, + int diffusionSteps + ) + { + if (condition.size() != static_cast(conditionDim_)) { + throw cet::exception("ScoreBasedDiffusionModel::generateSample") + << "Conditioning dimension mismatch: got " << condition.size() + << ", expected " << conditionDim_; + } + + // Use provided diffusionSteps if positive, otherwise use the model's configured value + int steps = (diffusionSteps > 0) ? diffusionSteps : diffusionSteps_; + + // Start from pure noise + std::vector x(dim_); + + for (int i=0;i= 0; --step) { + + double t = (double)step/steps; + double dt = 1.0/steps; + double s = sigma(t); // as long as diffusionSteps_ is not too large, s should not become too small to cause numerical issues. + + if (!useHeun) { + // Euler method (1st order) + std::vector input = x; + input.insert(input.end(), condition.begin(), condition.end()); + input.push_back(t); + + auto score = forward(input); + + for (int i=0;i input = x; + input.insert(input.end(), condition.begin(), condition.end()); + input.push_back(t); + auto score_k1 = forward(input); + + std::vector k1(dim_); + for (int i=0;i x_pred(dim_); + for (int i=0;i input_next = x_pred; + input_next.insert(input_next.end(), condition.begin(), condition.end()); + input_next.push_back(t_next); + auto score_k2 = forward(input_next); + + std::vector k2(dim_); + for (int i=0;i +#include +#include +#include + +// art includes +#include "art/Framework/Core/EDAnalyzer.h" +#include "art/Framework/Principal/Event.h" +#include "art/Framework/Principal/Handle.h" +#include "art/Framework/Principal/Run.h" + +// exception handling +#include "cetlib_except/exception.h" + +// fhicl includes +#include "canvas/Utilities/InputTag.h" +#include "fhiclcpp/types/Atom.h" + +// message handling +#include "messagefacility/MessageLogger/MessageLogger.h" + +// Offline includes +#include "Offline/GlobalConstantsService/inc/GlobalConstantsHandle.hh" +#include "Offline/GlobalConstantsService/inc/ParticleDataList.hh" +#include "Offline/MCDataProducts/inc/SimParticle.hh" +#include "Offline/MCDataProducts/inc/StepPointMC.hh" + +// ROOT includes +#include "art_root_io/TFileService.h" +#include "TTree.h" + +typedef cet::map_vector_key key_type; +typedef unsigned long VolumeId_type; + +namespace mu2e { + class VDResamplerConfigure : public art::EDAnalyzer { + public: + using Name=fhicl::Name; + using Comment=fhicl::Comment; + struct Config { + fhicl::Atom StepPointMCsTag{Name("StepPointMCsTag"), Comment("Tag identifying the StepPointMCs")}; + fhicl::Atom SimParticlemvTag{Name("SimParticlemvTag"), Comment("Tag identifying the SimParticlemv")}; + fhicl::Atom VirtualDetectorID{Name("VirtualDetectorID"), Comment("ID of the virtual detector to train on"), 116}; + fhicl::Atom VDResamplerDir{Name("VDResamplerDir"), Comment("Directory to store the generated csv files")}; + fhicl::Atom fclDir{Name("fclDir"), Comment("Directory to store the generated fhicl files"), ""}; + fhicl::Atom dataSourceTag{Name("dataSourceTag"), Comment("A tag to distinguish different data sources, will be appended to the generated fcl and csv files")}; + fhicl::Atom trainingThreshold{Name("trainingThreshold"), Comment("Minimum number of hits for a particle type to be included in the training"), 100}; + fhicl::Atom doROOTDump{Name("doROOTDump"), Comment("Whether to dump the VD hit info into a ROOT file for debugging and analysis"), false}; + }; + using Parameters = art::EDAnalyzer::Table; + explicit VDResamplerConfigure(const Parameters& conf); + void analyze(const art::Event& e); + void endJob(); + private: + art::ProductToken StepPointMCsToken; + art::ProductToken SimParticlemvToken; + VolumeId_type VirtualDetectorID = 0; + std::string VDResamplerDir; + std::string fclDir; + std::string dataSourceTag; + int trainingThreshold; + bool doROOTDump; + GlobalConstantsHandle pdt; + int pdgId = 0; + double x = 0.0, y = 0.0, z = 0.0, px = 0.0, py = 0.0, pz = 0.0, mass = 0.0, E = 0.0, time = 0.0; + VolumeId_type virtualdetectorId = 0; + int nThrownEvents = 0; + TTree* ttree; + std::map pdgIds; // + }; + + VDResamplerConfigure::VDResamplerConfigure(const Parameters& conf) : + art::EDAnalyzer(conf), + StepPointMCsToken(consumes(conf().StepPointMCsTag())), + SimParticlemvToken(consumes(conf().SimParticlemvTag())), + VirtualDetectorID(conf().VirtualDetectorID()), + VDResamplerDir(conf().VDResamplerDir()), + fclDir(conf().fclDir()), + dataSourceTag(conf().dataSourceTag()), + trainingThreshold(conf().trainingThreshold()), + doROOTDump(conf().doROOTDump()) { + if (doROOTDump) { + art::ServiceHandle tfs; + ttree = tfs->make( "ttree", "Virtual Detectors Hit Summary"); + ttree->Branch("time", &time, "time/D"); // ns + ttree->Branch("virtualdetectorId", &virtualdetectorId, "virtualdetectorId/l"); + ttree->Branch("pdgId", &pdgId, "pdgId/I"); + ttree->Branch("x", &x, "x/D"); // mm + ttree->Branch("y", &y, "y/D"); // mm + ttree->Branch("z", &z, "z/D"); // mm + ttree->Branch("px", &px, "px/D"); // MeV + ttree->Branch("py", &py, "py/D"); // MeV + ttree->Branch("pz", &pz, "pz/D"); // MeV + ttree->Branch("E", &E, "E/D"); // MeV + } + }; + + void VDResamplerConfigure::analyze(const art::Event& event) { + // Get the data products from the event + auto const& StepPointMCs = event.getProduct(StepPointMCsToken); + if (StepPointMCs.empty()) + return; + auto const& SimParticles = event.getProduct(SimParticlemvToken); + if (SimParticles.empty()) + return; + + // Loop over all VD hits + for (const StepPointMC& step : StepPointMCs) { + // Get the associated particle + const SimParticle& particle = SimParticles.at(step.trackId()); + pdgId = particle.pdgId(); + + virtualdetectorId = step.virtualDetectorId(); + pz = step.momentum().z(); + if (virtualdetectorId != VirtualDetectorID || pz <= 0) + { + // mf::LogWarning("VDResamplerConfigure") << "Thrown event\n" + // << "PDG ID = " << pdgId << ", VDID = " << virtualdetectorId << ", z = " << step.position().z() << ", pz = " << pz; + nThrownEvents += 1; + continue; // Filter hits based on the virtual detector ID and pz + } + + if (doROOTDump) { + x = step.position().x(); + y = step.position().y(); + z = step.position().z(); + px = step.momentum().x(); + py = step.momentum().y(); + pz = step.momentum().z(); + mass = pdt->particle(pdgId).mass(); + E = std::sqrt(step.momentum().mag2()+mass*mass)-mass; // Subtract the rest mass + if (E < 0) + throw cet::exception("LogicError", "Energy is negative"); + + ttree->Fill(); + } + + // Count the number of hits for each particle type for the summary + if (pdgIds.find(pdgId) != pdgIds.end()) + pdgIds[pdgId] += 1; + else + pdgIds.emplace(std::make_pair(pdgId, 1)); + }; + return; + }; + + void VDResamplerConfigure::endJob() { + mf::LogInfo log("Virtual Detector Resampler Training Configuration Summary"); + log << "========= Particle Summary =========\n"; + for (auto part : pdgIds) + log << "PDGID " << part.first << ": " << part.second << "\n"; + log << "====================================\n"; + log << "Number of thrown events: " << nThrownEvents << "\n"; + + // store a summary of the number of hits for each particle type in a csv file for reference, + // all particles are included, but the particle types with hits below the training threshold are + // marked with an asterisk to indicate that they will not be included in the training. + std::string summaryFile = VDResamplerDir + "/VDResamplerConfigure_" + dataSourceTag + "_HitSummary.csv"; + std::string fclFilePath = fclDir.empty() ? VDResamplerDir : fclDir; + std::string fclFile = fclFilePath + "/VDResamplerTrain_" + dataSourceTag + ".fcl"; + std::ofstream sumOutFile(summaryFile); + std::ofstream fclOutFile(fclFile); + if (!sumOutFile) { + throw cet::exception("VDResamplerConfigure::endJob") << "Cannot open file " << summaryFile; + } + if (!fclOutFile) { + throw cet::exception("VDResamplerConfigure::endJob") << "Cannot open file " << fclFile; + } + + std::string trainingPathNames = ""; + std::vector> trainingPaths; // pairs of (moduleName, pathName) + + sumOutFile << "PDGID,HitCount,TrainingMode\n"; + + fclOutFile << "# This fcl is generated by VDResamplerConfigure_module.cc\n"; + fclOutFile << "# Training configuration for the VD resampler is generated for each particle type\n\n"; + fclOutFile << "#include \"Offline/fcl/standardServices.fcl\"\n"; + fclOutFile << "#include \"Production/JobConfig/pileup/STM/prolog.fcl\"\n\n"; + fclOutFile << "process_name: VDResamplerTrain\n\n"; + fclOutFile << "source : @local::STMPileup.VDResamplerSource\n\n"; + fclOutFile << "services : @local::Services.Sim\n\n"; + fclOutFile << "physics: {\n analyzers : {\n"; + + for (const auto& part : pdgIds) { + sumOutFile << part.first << "," << part.second; + bool useTwoStageTraining = false; + if (part.second < 100000) { + // if data is limited, use two-stage training to improve performance and reduce the required training data size for each model. + useTwoStageTraining = true; + } + if (part.second < trainingThreshold) { + sumOutFile << ",*\n"; + mf::LogWarning("VDResamplerConfigure::endJob") << "Particle type with PDGID " << part.first + << " has only " << part.second << " hits, which is below the training threshold of " + << trainingThreshold << ". This particle type will NOT be included in the training."; + } else { + // mark how many models will be trained for this particle type (1 for all-at-once, 2 for two-stage) + sumOutFile << "," << (useTwoStageTraining ? "2" : "1") << "\n"; + + // Generate the fcl file for training for this particle type + // if pdgID is negative, we will use "m" instead of "-" in the filename to avoid issues with file naming + std::string pdgIdstr = (part.first < 0) ? "m" + std::to_string(-part.first) : std::to_string(part.first); + std::string moduleName = "VDResamplerTrainVD"+ std::to_string(VirtualDetectorID) + dataSourceTag + "pdg" + pdgIdstr; + std::string pathName = "trainPathVD" + std::to_string(VirtualDetectorID) + "pdg" + pdgIdstr; + trainingPaths.push_back({moduleName, pathName}); + fclOutFile << " " << moduleName << " : {\n" + << " module_type : VDResamplerTrain\n" + << " StepPointMCsTag : @local::SimplifyStage1Data.StepPointMCsTag\n" + << " SimParticlemvTag : @local::SimplifyStage1Data.SimParticlemvTag\n" + << " SBDMuseTwoStageTraining : " << (useTwoStageTraining ? "true" : "false") << "\n"; + if (useTwoStageTraining) { + fclOutFile << " SBDMstage1ModelFile : \"" << VDResamplerDir << "/SBDMmodel_stage1_VD" << VirtualDetectorID << "_pdg" << pdgIdstr << ".csv\"\n" + << " SBDMstage2ModelFile : \"" << VDResamplerDir << "/SBDMmodel_stage2_VD" << VirtualDetectorID << "_pdg" << pdgIdstr << ".csv\"\n"; + } else { + fclOutFile << " SBDMallAtOnceModelFile : \"" << VDResamplerDir << "/SBDMmodel_allAtOnce_VD" << VirtualDetectorID << "_pdg" << pdgIdstr << ".csv\"\n"; + } + fclOutFile << " VirtualDetectorID : " << VirtualDetectorID << "\n" + // << " VDz0 : " << std::to_string() << "\n" // include if not VD116 + // << " VDr : " << std::to_string() << "\n" // include if not VD116 + << " pdgID : " << part.first << "\n" + // << " SBDMhidden : 128\n" + // << " SBDMlayers : 4\n" + // << " SBDMoptimizer : \"ADAM\"\n" + // << " SBDMadamBeta1 : 0.9\n" + // << " SBDMadamBeta2 : 0.999\n" + // << " SBDMadamEps : 1e-8\n" + // << " SBDMnoiseSchedule : \"COSINE\"\n" + // << " SBDMbetaMin : 1e-4\n" + // << " SBDMbetaMax : 0.02\n" + // << " SBDMcosineOffset : 0.008\n" + // << " SBDMbatchSize : 32\n" + // << " SBDMgradientClip : 1.0\n" + // << " SBDMlearningRate : 1e-3\n" + // << " SBDMdiffusionSteps : 200\n" + << " SBDMtrainingSize : " << part.second << "\n" + // << " SBDMtrainingEpochs : 10\n" + << " }\n"; + } + } + fclOutFile << " }\n"; + + // Create trigger paths for each training module + for (size_t i = 0; i < trainingPaths.size(); ++i) { + fclOutFile << " " << trainingPaths[i].second << " : [" << trainingPaths[i].first << "]\n"; + if (i > 0) trainingPathNames += ", "; + trainingPathNames += trainingPaths[i].second; + } + + fclOutFile << " end_paths: [" + trainingPathNames + "]\n"; + fclOutFile << "}\n\n"; + if (doROOTDump) fclOutFile << "services.TFileService.fileName : @nil\n"; + + return; + }; +}; // end namespace mu2e + +DEFINE_ART_MODULE(mu2e::VDResamplerConfigure) diff --git a/STMMC/src/VDResamplerGenerateFromModel_module.cc b/STMMC/src/VDResamplerGenerateFromModel_module.cc new file mode 100644 index 0000000000..9f72ccbea0 --- /dev/null +++ b/STMMC/src/VDResamplerGenerateFromModel_module.cc @@ -0,0 +1,342 @@ +// VDResamplerGenerateFromModel_module.cc +// This module takes either a single all-at-once model file or a pair of stage-1/stage-2 +// model files and produces new virtual-detector-like GenParticles from the trained +// score-based diffusion model(s). +// Yongyi Wu, Mar. 2026 + +// stdlib includes +#include +#include +#include +#include +#include +#include +#include + +#include "Offline/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh" + +// art includes +#include "art/Framework/Core/EDProducer.h" +#include "art/Framework/Principal/Event.h" +#include "art/Framework/Services/Optional/RandomNumberGenerator.h" +#include "art/Framework/Services/Registry/ServiceHandle.h" + +// CLHEP includes +#include "CLHEP/Vector/LorentzVector.h" +#include "CLHEP/Vector/ThreeVector.h" +#include "CLHEP/Random/RandFlat.h" +#include "CLHEP/Random/RandGaussQ.h" + +// exception handling +#include "cetlib_except/exception.h" + +// fhicl includes +#include "fhiclcpp/types/Atom.h" + +// message handling +#include "messagefacility/MessageLogger/MessageLogger.h" + +// Offline includes +#include "Offline/DataProducts/inc/PDGCode.hh" +#include "Offline/GlobalConstantsService/inc/GlobalConstantsHandle.hh" +#include "Offline/GlobalConstantsService/inc/ParticleDataList.hh" +#include "Offline/MCDataProducts/inc/GenId.hh" +#include "Offline/MCDataProducts/inc/GenParticle.hh" +#include "Offline/SeedService/inc/SeedService.hh" +#include "Offline/STMMC/inc/VDResamplerTransformDefaults.hh" + +// ROOT includes +#include "art_root_io/TFileService.h" +#include "TTree.h" + +namespace mu2e { + + namespace { + // Utility function to extract PDG ID from the model filename, which is expected to contain a substring like "pdg[optional m][digits]", e.g. "pdg13" for muons, "pdgm13" for muon pluses. + int loadPDGIdFromFileName(const std::string& fileName) { + const size_t pdgPos = fileName.find("pdg"); + if (pdgPos == std::string::npos) { + throw cet::exception("VDResamplerGenerateFromModel") + << "Cannot infer PDG ID from model filename: " << fileName; + } + size_t pos = pdgPos + 3; + bool negative = false; + if (pos < fileName.size() && (fileName[pos] == 'm' || fileName[pos] == '-')) { + negative = true; + ++pos; + } + const size_t startDigits = pos; + while (pos < fileName.size() && std::isdigit(static_cast(fileName[pos]))) { + ++pos; + } + if (startDigits == pos) { + throw cet::exception("VDResamplerGenerateFromModel") + << "Cannot infer PDG ID from model filename: " << fileName; + } + const int magnitude = std::stoi(fileName.substr(startDigits, pos - startDigits)); + return negative ? -magnitude : magnitude; + } + } + + class VDResamplerGenerateFromModel : public art::EDProducer { + public: + using Name = fhicl::Name; + using Comment = fhicl::Comment; + + struct Config { + fhicl::Atom useTwoStageModel{ + Name("useTwoStageModel"), + Comment("If true, use stage-1/stage-2 models. If false, use the all-at-once 6D model."), + true + }; + fhicl::Atom stage1ModelFile{ + Name("stage1ModelFile"), + Comment("CSV filename for the stage-1 model parameters"), + "" + }; + fhicl::Atom stage2ModelFile{ + Name("stage2ModelFile"), + Comment("CSV filename for the stage-2 model parameters"), + "" + }; + fhicl::Atom allAtOnceModelFile{ + Name("allAtOnceModelFile"), + Comment("CSV filename for the all-at-once 6D model parameters"), + "" + }; + fhicl::Atom useHeun{ + Name("useHeun"), + Comment("If true, use Heun's method for reverse diffusion. Otherwise use Euler."), + true + }; + fhicl::Atom diffusionSteps{ + Name("diffusionSteps"), + Comment("Number of reverse-diffusion steps used for sampling"), + 200 + }; + fhicl::Atom VDz0{ + Name("VDz0"), + Comment("Nominal z coordinate of the virtual detector"), + 37700.39 + }; + fhicl::Atom VDr{ + Name("VDr"), + Comment("Virtual detector radius used in the training transform"), + 2000.0 + }; + fhicl::Atom doROOTDump{ + Name("doROOTDump"), + Comment("Whether to dump generated samples into a ROOT tree"), + false + }; + }; + + using Parameters = art::EDProducer::Table; + + explicit VDResamplerGenerateFromModel(const Parameters& conf); + void produce(art::Event& event) override; + + private: + art::RandomNumberGenerator::base_engine_t& engine_; + CLHEP::RandFlat randFlat_; + CLHEP::RandGaussQ randGaussQ_; + bool useTwoStageModel_; + std::string stage1ModelFile_; + std::string stage2ModelFile_; + std::string allAtOnceModelFile_; + bool useHeun_; + int diffusionSteps_; + double VDz0_; + double VDr_; + bool doROOTDump_; + GlobalConstantsHandle pdt_; + + std::unique_ptr allAtOnceModel_; + std::unique_ptr stage1Model_; + std::unique_ptr stage2Model_; + + int pdgId_ = 0; + + // Detector-center parameters used in the same transform as training. + double x0_ = vdresampler::kX0; + double y0_ = vdresampler::kY0; + double t0_ = vdresampler::kT0; + double tScale_ = vdresampler::kTScale; + double p0_ = vdresampler::kP0; + + // Variables for optional ROOT dump. + TTree* outTree_ = nullptr; + double x_gen_ = 0.0; + double y_gen_ = 0.0; + double z_gen_ = 0.0; + double t_gen_ = 0.0; + double px_gen_ = 0.0; + double py_gen_ = 0.0; + double pz_gen_ = 0.0; + double mass_gen_ = 0.0; + double E_gen_ = 0.0; + }; + + VDResamplerGenerateFromModel::VDResamplerGenerateFromModel(const Parameters& conf) + : art::EDProducer{conf}, + engine_(createEngine(art::ServiceHandle()->getSeed())), + randFlat_(engine_), + randGaussQ_(engine_), + useTwoStageModel_(conf().useTwoStageModel()), + stage1ModelFile_(conf().stage1ModelFile()), + stage2ModelFile_(conf().stage2ModelFile()), + allAtOnceModelFile_(conf().allAtOnceModelFile()), + useHeun_(conf().useHeun()), + diffusionSteps_(conf().diffusionSteps()), + VDz0_(conf().VDz0()), + VDr_(conf().VDr()), + doROOTDump_(conf().doROOTDump()) { + + produces(); + + if (useTwoStageModel_) { + if (stage1ModelFile_.empty() || stage2ModelFile_.empty()) { + throw cet::exception("VDResamplerGenerateFromModel") + << "Two-stage generation requires both stage1ModelFile and stage2ModelFile."; + } + + stage1Model_ = std::make_unique( + ScoreBasedDiffusionModel::loadModel(randFlat_, randGaussQ_, stage1ModelFile_) + ); + stage2Model_ = std::make_unique( + ScoreBasedDiffusionModel::loadModel(randFlat_, randGaussQ_, stage2ModelFile_) + ); + pdgId_ = loadPDGIdFromFileName(stage1ModelFile_); + } else { + if (allAtOnceModelFile_.empty()) { + throw cet::exception("VDResamplerGenerateFromModel") + << "All-at-once generation requires allAtOnceModelFile."; + } + + allAtOnceModel_ = std::make_unique( + ScoreBasedDiffusionModel::loadModel(randFlat_, randGaussQ_, allAtOnceModelFile_) + ); + pdgId_ = loadPDGIdFromFileName(allAtOnceModelFile_); + } + + z_gen_ = VDz0_; + + if (doROOTDump_) { + art::ServiceHandle tfs; + outTree_ = tfs->make("ttree", "Generated samples from VD resampler model"); + outTree_->Branch("pdgId", &pdgId_, "pdgId/I"); + outTree_->Branch("x", &x_gen_, "x/D"); + outTree_->Branch("y", &y_gen_, "y/D"); + outTree_->Branch("z", &z_gen_, "z/D"); + outTree_->Branch("time", &t_gen_, "time/D"); + outTree_->Branch("px", &px_gen_, "px/D"); + outTree_->Branch("py", &py_gen_, "py/D"); + outTree_->Branch("pz", &pz_gen_, "pz/D"); + outTree_->Branch("E", &E_gen_, "E/D"); + } + } + + void VDResamplerGenerateFromModel::produce(art::Event& event) { + auto output = std::make_unique(); + + double x_trans = 0.0; + double y_trans = 0.0; + double t_trans = 0.0; + double pr_t = 0.0; + double pphi_t = 0.0; + double pz_t = 0.0; + + // Generate a new sample using the loaded model(s) + // note the values here are transformed and need to be inverted back to the original coordinates after sampling. + if (useTwoStageModel_) { + const std::vector stage1Sample = stage1Model_->generateSample({}, useHeun_, diffusionSteps_); + if (stage1Sample.size() != 3u) { + throw cet::exception("VDResamplerGenerateFromModel") + << "Stage-1 model returned " << stage1Sample.size() << " values, expected 3."; + } + + t_trans = stage1Sample[0]; + x_trans = stage1Sample[1]; + y_trans = stage1Sample[2]; + + const std::vector stage2Condition = {t_trans, x_trans, y_trans}; + const std::vector stage2Sample = stage2Model_->generateSample(stage2Condition, useHeun_, diffusionSteps_); + if (stage2Sample.size() != 3u) { + throw cet::exception("VDResamplerGenerateFromModel") + << "Stage-2 model returned " << stage2Sample.size() << " values, expected 3."; + } + + pr_t = stage2Sample[0]; + pphi_t = stage2Sample[1]; + pz_t = stage2Sample[2]; + } else { + const std::vector sample = allAtOnceModel_->generateSample({}, useHeun_, diffusionSteps_); + if (sample.size() != 6u) { + throw cet::exception("VDResamplerGenerateFromModel") + << "All-at-once model returned " << sample.size() << " values, expected 6."; + } + + t_trans = sample[0]; + x_trans = sample[1]; + y_trans = sample[2]; + pr_t = sample[3]; + pphi_t = sample[4]; + pz_t = sample[5]; + } + + // Recover detector coordinates from the transformed (x', y'). + const double u = std::sqrt(x_trans * x_trans + y_trans * y_trans); + const double theta = std::atan2(y_trans, x_trans); + const double rho = std::tanh(u); + const double r = rho * VDr_; + const double dx = r * std::cos(theta); + const double dy = r * std::sin(theta); + x_gen_ = dx + x0_; + y_gen_ = dy + y0_; + z_gen_ = VDz0_; + + // Invert the time transform. + t_gen_ = t0_ * std::exp(t_trans * tScale_); + + // Invert the momentum transform. + const double pr = p0_ * std::sinh(pr_t); + const double pphi = p0_ * std::sinh(pphi_t); + pz_gen_ = p0_ * std::sinh(pz_t); + + if (r > 1e-6) { + const double rx = dx / r; + const double ry = dy / r; + const double phix = -ry; + const double phiy = rx; + px_gen_ = pr * rx + pphi * phix; + py_gen_ = pr * ry + pphi * phiy; + } else { + px_gen_ = pr; + py_gen_ = pphi; + } + + mass_gen_ = pdt_->particle(pdgId_).mass(); + const CLHEP::Hep3Vector momParticle(px_gen_, py_gen_, pz_gen_); + const CLHEP::Hep3Vector posParticle(x_gen_, y_gen_, z_gen_); + const double eTotal = std::sqrt(momParticle.mag2() + mass_gen_ * mass_gen_); + E_gen_ = eTotal - mass_gen_; + const CLHEP::HepLorentzVector fourMomParticle(eTotal, momParticle); + + output->emplace_back( + PDGCode::type(pdgId_), + GenId::STMDownStreamGenTool, + posParticle, + fourMomParticle, + t_gen_ + ); + + event.put(std::move(output)); + + if (doROOTDump_) { + outTree_->Fill(); + } + } + +} // namespace mu2e + +DEFINE_ART_MODULE(mu2e::VDResamplerGenerateFromModel) diff --git a/STMMC/src/VDResamplerGenerateMix_module.cc b/STMMC/src/VDResamplerGenerateMix_module.cc new file mode 100644 index 0000000000..a0c477b375 --- /dev/null +++ b/STMMC/src/VDResamplerGenerateMix_module.cc @@ -0,0 +1,500 @@ +// VDResamplerGenerateMix_module.cc +// This module takes a list of VDResamplerConfigure_[dataSourceTag]_HitSummary.csv files +// generated by VDResamplerConfigure_module.cc, plus a matching list of POT counts. +// It chooses which source summary to use according to the trained hit yield per POT, +// then chooses the particle type according to the per-summary particle ratios, loads +// either a one-stage or two-stage model for that particle, and generates a GenParticle. +// Yongyi Wu, Mar. 2026 + +// stdlib includes +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Offline/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh" + +// art includes +#include "art/Framework/Core/EDProducer.h" +#include "art/Framework/Principal/Event.h" +#include "art/Framework/Services/Optional/RandomNumberGenerator.h" +#include "art/Framework/Services/Registry/ServiceHandle.h" + +// CLHEP includes +#include "CLHEP/Random/RandFlat.h" +#include "CLHEP/Random/RandGaussQ.h" +#include "CLHEP/Vector/LorentzVector.h" +#include "CLHEP/Vector/ThreeVector.h" + +// exception handling +#include "cetlib_except/exception.h" + +// fhicl includes +#include "fhiclcpp/types/Atom.h" +#include "fhiclcpp/types/Sequence.h" + +// message handling +#include "messagefacility/MessageLogger/MessageLogger.h" + +// Offline includes +#include "Offline/DataProducts/inc/PDGCode.hh" +#include "Offline/GlobalConstantsService/inc/GlobalConstantsHandle.hh" +#include "Offline/GlobalConstantsService/inc/ParticleDataList.hh" +#include "Offline/MCDataProducts/inc/GenId.hh" +#include "Offline/MCDataProducts/inc/GenParticle.hh" +#include "Offline/SeedService/inc/SeedService.hh" +#include "Offline/STMMC/inc/VDResamplerTransformDefaults.hh" + +// ROOT includes +#include "art_root_io/TFileService.h" +#include "TTree.h" + +namespace mu2e { + + namespace { + + // Utility function to trim whitespace from a string + std::string trim(const std::string& s) { + const auto begin = std::find_if_not(s.begin(), s.end(), [](unsigned char ch) { return std::isspace(ch); }); + if (begin == s.end()) { + return ""; + } + const auto end = std::find_if_not(s.rbegin(), s.rend(), [](unsigned char ch) { return std::isspace(ch); }).base(); + return std::string(begin, end); + } + + // Utility function to split a CSV line into fields, trimming whitespace + std::vector splitCSVLine(const std::string& line) { + std::vector fields; + std::stringstream ss(line); + std::string item; + while (std::getline(ss, item, ',')) { + fields.push_back(trim(item)); + } + return fields; + } + + std::string pdgIdToFileToken(const int pdgId) { + return (pdgId < 0) ? ("m" + std::to_string(-pdgId)) : std::to_string(pdgId); + } + } + + class VDResamplerGenerateMix : public art::EDProducer { + public: + using Name = fhicl::Name; + using Comment = fhicl::Comment; + + struct Config { + fhicl::Sequence hitSummaryFiles{ + Name("hitSummaryFiles"), + Comment("List of VDResamplerConfigure_[dataSourceTag]_HitSummary.csv files") + }; + fhicl::Sequence potsPerFile{ + Name("potsPerFile"), + Comment("List of POT counts corresponding to hitSummaryFiles") + }; + fhicl::Atom ModelFileDir{ + Name("ModelFileDir"), + Comment("Directory where the trained SBDM model files are stored") + }; + fhicl::Atom VirtualDetectorID{ + Name("VirtualDetectorID"), + Comment("Virtual detector ID encoded in the model filenames"), + 116 + }; + fhicl::Atom useHeun{ + Name("useHeun"), + Comment("Whether to use Heun's method for reverse diffusion"), + true + }; + fhicl::Atom diffusionSteps{ + Name("diffusionSteps"), + Comment("Number of diffusion steps for sampling"), + 200 + }; + fhicl::Atom VDz0{ + Name("VDz0"), + Comment("Nominal z coordinate of the virtual detector"), + 37700.39 + }; + fhicl::Atom VDr{ + Name("VDr"), + Comment("Virtual detector radius used in the training transform"), + 2000.0 + }; + fhicl::Atom doROOTDump{ + Name("doROOTDump"), + Comment("Whether to dump generated samples into a ROOT tree"), + false + }; + }; + + using Parameters = art::EDProducer::Table; + + explicit VDResamplerGenerateMix(const Parameters& conf); + void produce(art::Event& event) override; + + private: + struct ParticleModelSet { + int pdgId = 0; + int hitCount = 0; + bool useTwoStageModel = false; + std::unique_ptr allAtOnceModel; + std::unique_ptr stage1Model; + std::unique_ptr stage2Model; + }; + + struct SourceSummary { + std::string hitSummaryFile; + double pots = 0.0; + double sourceWeight = 0.0; + int trainedHitCount = 0; + std::vector particles; + }; + + art::RandomNumberGenerator::base_engine_t& engine_; + CLHEP::RandFlat randFlat_; + CLHEP::RandGaussQ randGaussQ_; + std::string modelFileDir_; + int virtualDetectorID_ = 0; + bool useHeun_ = true; + int diffusionSteps_ = 0; + double VDz0_ = 0.0; + double VDr_ = 0.0; + bool doROOTDump_ = false; + GlobalConstantsHandle pdt_; + + std::vector sources_; + double totalSourceWeight_ = 0.0; + + double x0_ = vdresampler::kX0; + double y0_ = vdresampler::kY0; + double t0_ = vdresampler::kT0; + double tScale_ = vdresampler::kTScale; + double p0_ = vdresampler::kP0; + + TTree* outTree_ = nullptr; + int summaryIndex_ = -1; + int pdgId_ = 0; + int generationMode_ = 0; // 1 = all-at-once, 2 = two-stage + double x_gen_ = 0.0; + double y_gen_ = 0.0; + double z_gen_ = 0.0; + double t_gen_ = 0.0; + double px_gen_ = 0.0; + double py_gen_ = 0.0; + double pz_gen_ = 0.0; + double mass_gen_ = 0.0; + double E_gen_ = 0.0; + + SourceSummary loadSourceSummary(const std::string& summaryFile, double pots); + void generateFromModels( + const ParticleModelSet& modelSet, + double& xTrans, + double& yTrans, + double& tTrans, + double& prTrans, + double& pphiTrans, + double& pzTrans + ) const; + }; + + VDResamplerGenerateMix::VDResamplerGenerateMix(const Parameters& conf) + : art::EDProducer{conf}, + engine_(createEngine(art::ServiceHandle()->getSeed())), + randFlat_(engine_), + randGaussQ_(engine_), + modelFileDir_(conf().ModelFileDir()), + virtualDetectorID_(conf().VirtualDetectorID()), + useHeun_(conf().useHeun()), + diffusionSteps_(conf().diffusionSteps()), + VDz0_(conf().VDz0()), + VDr_(conf().VDr()), + doROOTDump_(conf().doROOTDump()) { + + produces(); + + const auto& hitSummaryFiles = conf().hitSummaryFiles(); + const auto& potsPerFile = conf().potsPerFile(); + if (hitSummaryFiles.size() != potsPerFile.size()) { + throw cet::exception("VDResamplerGenerateMix") + << "hitSummaryFiles and potsPerFile must have the same length."; + } + if (hitSummaryFiles.empty()) { + throw cet::exception("VDResamplerGenerateMix") + << "At least one hit summary file must be provided."; + } + + double referencePots = 0.0; + for (size_t i = 0; i < hitSummaryFiles.size(); ++i) { + sources_.push_back(loadSourceSummary(hitSummaryFiles[i], potsPerFile[i])); + referencePots = std::max(referencePots, sources_.back().pots); + } + + for (auto& source : sources_) { + // Use a common POT reference scale so we preserve the desired + // proportionality trainedHitCount / pots without ever storing tiny + // per-source weights like O(1e-11). + source.sourceWeight = static_cast(source.trainedHitCount) * (referencePots / source.pots); + totalSourceWeight_ += source.sourceWeight; + } + + if (totalSourceWeight_ <= 0.0) { + throw cet::exception("VDResamplerGenerateMix") + << "No trained particles with positive weight were found across the provided summaries."; + } + + if (doROOTDump_) { + art::ServiceHandle tfs; + outTree_ = tfs->make("ttree", "Generated samples from mixed VD resampler models"); + outTree_->Branch("summaryIndex", &summaryIndex_, "summaryIndex/I"); + outTree_->Branch("pdgId", &pdgId_, "pdgId/I"); + outTree_->Branch("generationMode", &generationMode_, "generationMode/I"); + outTree_->Branch("x", &x_gen_, "x/D"); + outTree_->Branch("y", &y_gen_, "y/D"); + outTree_->Branch("z", &z_gen_, "z/D"); + outTree_->Branch("time", &t_gen_, "time/D"); + outTree_->Branch("px", &px_gen_, "px/D"); + outTree_->Branch("py", &py_gen_, "py/D"); + outTree_->Branch("pz", &pz_gen_, "pz/D"); + outTree_->Branch("E", &E_gen_, "E/D"); + } + } + + VDResamplerGenerateMix::SourceSummary VDResamplerGenerateMix::loadSourceSummary( + const std::string& summaryFile, + const double pots + ) { + if (pots <= 0.0) { + throw cet::exception("VDResamplerGenerateMix") + << "potsPerFile must be positive for " << summaryFile; + } + + std::ifstream in(summaryFile); + if (!in) { + throw cet::exception("VDResamplerGenerateMix") + << "Cannot open hit summary file " << summaryFile; + } + + SourceSummary source; + source.hitSummaryFile = summaryFile; + source.pots = pots; + + std::string line; + bool firstLine = true; + while (std::getline(in, line)) { + line = trim(line); + if (line.empty()) { + continue; + } + if (firstLine) { + firstLine = false; + continue; // header + } + + const std::vector fields = splitCSVLine(line); + if (fields.size() < 3) { + throw cet::exception("VDResamplerGenerateMix") + << "Malformed summary line in " << summaryFile << ": " << line; + } + + const int pdgId = std::stoi(fields[0]); + const int hitCount = std::stoi(fields[1]); + const std::string modeToken = fields[2]; + if (modeToken == "*") { + continue; + } + + ParticleModelSet particle; + particle.pdgId = pdgId; + particle.hitCount = hitCount; + particle.useTwoStageModel = (modeToken == "2"); + if (modeToken != "1" && modeToken != "2") { + throw cet::exception("VDResamplerGenerateMix") + << "Unexpected model mode token '" << modeToken << "' in " << summaryFile; + } + + const std::string pdgToken = pdgIdToFileToken(pdgId); + if (particle.useTwoStageModel) { + const std::string stage1ModelFile = + modelFileDir_ + "/SBDMmodel_stage1_VD" + std::to_string(virtualDetectorID_) + "_pdg" + pdgToken + ".csv"; + const std::string stage2ModelFile = + modelFileDir_ + "/SBDMmodel_stage2_VD" + std::to_string(virtualDetectorID_) + "_pdg" + pdgToken + ".csv"; + + particle.stage1Model = std::make_unique( + ScoreBasedDiffusionModel::loadModel(randFlat_, randGaussQ_, stage1ModelFile) + ); + particle.stage2Model = std::make_unique( + ScoreBasedDiffusionModel::loadModel(randFlat_, randGaussQ_, stage2ModelFile) + ); + } else { + const std::string modelFile = + modelFileDir_ + "/SBDMmodel_allAtOnce_VD" + std::to_string(virtualDetectorID_) + "_pdg" + pdgToken + ".csv"; + particle.allAtOnceModel = std::make_unique( + ScoreBasedDiffusionModel::loadModel(randFlat_, randGaussQ_, modelFile) + ); + } + + source.trainedHitCount += hitCount; + source.particles.push_back(std::move(particle)); + } + + if (source.particles.empty()) { + throw cet::exception("VDResamplerGenerateMix") + << "Summary file " << summaryFile << " contains no trained particle entries."; + } + + source.sourceWeight = 0.0; + return source; + } + + void VDResamplerGenerateMix::generateFromModels( + const ParticleModelSet& modelSet, + double& xTrans, + double& yTrans, + double& tTrans, + double& prTrans, + double& pphiTrans, + double& pzTrans + ) const { + if (modelSet.useTwoStageModel) { + const std::vector stage1Sample = modelSet.stage1Model->generateSample({}, useHeun_, diffusionSteps_); + if (stage1Sample.size() != 3u) { + throw cet::exception("VDResamplerGenerateMix") + << "Stage-1 model returned " << stage1Sample.size() << " values, expected 3."; + } + + tTrans = stage1Sample[0]; + xTrans = stage1Sample[1]; + yTrans = stage1Sample[2]; + + const std::vector condition = {tTrans, xTrans, yTrans}; + const std::vector stage2Sample = modelSet.stage2Model->generateSample(condition, useHeun_, diffusionSteps_); + if (stage2Sample.size() != 3u) { + throw cet::exception("VDResamplerGenerateMix") + << "Stage-2 model returned " << stage2Sample.size() << " values, expected 3."; + } + + prTrans = stage2Sample[0]; + pphiTrans = stage2Sample[1]; + pzTrans = stage2Sample[2]; + } else { + const std::vector sample = modelSet.allAtOnceModel->generateSample({}, useHeun_, diffusionSteps_); + if (sample.size() != 6u) { + throw cet::exception("VDResamplerGenerateMix") + << "All-at-once model returned " << sample.size() << " values, expected 6."; + } + + tTrans = sample[0]; + xTrans = sample[1]; + yTrans = sample[2]; + prTrans = sample[3]; + pphiTrans = sample[4]; + pzTrans = sample[5]; + } + } + + void VDResamplerGenerateMix::produce(art::Event& event) { + auto output = std::make_unique(); + + // Choose which source summary to use according to trained hit yield per POT. + double draw = randFlat_.fire() * totalSourceWeight_; + size_t chosenSourceIndex = 0; + for (; chosenSourceIndex + 1 < sources_.size(); ++chosenSourceIndex) { + draw -= sources_[chosenSourceIndex].sourceWeight; + if (draw < 0.0) { + break; + } + } + const SourceSummary& source = sources_[chosenSourceIndex]; + + // Choose particle type according to the hit ratios within the selected source. + double totalParticleWeight = 0.0; + for (const auto& particle : source.particles) { + totalParticleWeight += particle.hitCount; + } + if (totalParticleWeight <= 0.0) { + throw cet::exception("VDResamplerGenerateMix") + << "Selected source " << source.hitSummaryFile << " has non-positive particle weight."; + } + + draw = randFlat_.fire() * totalParticleWeight; + size_t chosenParticleIndex = 0; + for (; chosenParticleIndex + 1 < source.particles.size(); ++chosenParticleIndex) { + draw -= source.particles[chosenParticleIndex].hitCount; + if (draw < 0.0) { + break; + } + } + const ParticleModelSet& modelSet = source.particles[chosenParticleIndex]; + + double xTrans = 0.0; + double yTrans = 0.0; + double tTrans = 0.0; + double prTrans = 0.0; + double pphiTrans = 0.0; + double pzTrans = 0.0; + generateFromModels(modelSet, xTrans, yTrans, tTrans, prTrans, pphiTrans, pzTrans); + + const double u = std::sqrt(xTrans * xTrans + yTrans * yTrans); + const double theta = std::atan2(yTrans, xTrans); + const double rho = std::tanh(u); + const double r = rho * VDr_; + const double dx = r * std::cos(theta); + const double dy = r * std::sin(theta); + x_gen_ = dx + x0_; + y_gen_ = dy + y0_; + z_gen_ = VDz0_; + + t_gen_ = t0_ * std::exp(tTrans * tScale_); + + const double pr = p0_ * std::sinh(prTrans); + const double pphi = p0_ * std::sinh(pphiTrans); + pz_gen_ = p0_ * std::sinh(pzTrans); + + if (r > 1e-6) { + const double rx = dx / r; + const double ry = dy / r; + const double phix = -ry; + const double phiy = rx; + px_gen_ = pr * rx + pphi * phix; + py_gen_ = pr * ry + pphi * phiy; + } else { + px_gen_ = pr; + py_gen_ = pphi; + } + + pdgId_ = modelSet.pdgId; + generationMode_ = modelSet.useTwoStageModel ? 2 : 1; + summaryIndex_ = static_cast(chosenSourceIndex); + + mass_gen_ = pdt_->particle(pdgId_).mass(); + const CLHEP::Hep3Vector momParticle(px_gen_, py_gen_, pz_gen_); + const CLHEP::Hep3Vector posParticle(x_gen_, y_gen_, z_gen_); + const double eTotal = std::sqrt(momParticle.mag2() + mass_gen_ * mass_gen_); + E_gen_ = eTotal - mass_gen_; + const CLHEP::HepLorentzVector fourMomParticle(eTotal, momParticle); + + output->emplace_back( + PDGCode::type(pdgId_), + GenId::STMDownStreamGenTool, + posParticle, + fourMomParticle, + t_gen_ + ); + + event.put(std::move(output)); + + if (doROOTDump_) { + outTree_->Fill(); + } + } + +} // namespace mu2e + +DEFINE_ART_MODULE(mu2e::VDResamplerGenerateMix) diff --git a/STMMC/src/VDResamplerTrain_module.cc b/STMMC/src/VDResamplerTrain_module.cc new file mode 100644 index 0000000000..14c0f6d843 --- /dev/null +++ b/STMMC/src/VDResamplerTrain_module.cc @@ -0,0 +1,397 @@ +// VDResamplerTrain_module.cc +// For StepPointMCs on the designated virtual detectors, train either: +// 1) a two-stage score-based diffusion model with +// Stage 1: unconditional model for (t', x', y') +// Stage 2: conditional model for (p_r', p_phi', p_z' | t', x', y') +// 2) a single unconditional score-based diffusion model for +// (t', x', y', p_r', p_phi', p_z') +// and store the trained model parameters in CSV files. +// note that p_z are filtered and only hits with positive p_z are kept +// Yongyi Wu, Mar. 2026 + +// stdlib includes +#include +#include +#include + +#include "Offline/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh" + +#include "CLHEP/Random/RandFlat.h" +#include "CLHEP/Random/RandGaussQ.h" + +// art includes +#include "art/Framework/Core/EDAnalyzer.h" +#include "art/Framework/Principal/Event.h" +#include "art/Framework/Principal/Handle.h" +#include "art/Framework/Principal/Run.h" +#include "art/Framework/Services/Optional/RandomNumberGenerator.h" +#include "art/Framework/Services/Registry/ServiceHandle.h" + +// exception handling +#include "cetlib_except/exception.h" + +// fhicl includes +#include "canvas/Utilities/InputTag.h" +#include "fhiclcpp/ParameterSet.h" +#include "fhiclcpp/types/Atom.h" + +// message handling +#include "messagefacility/MessageLogger/MessageLogger.h" + +// Offline includes +#include "Offline/GlobalConstantsService/inc/GlobalConstantsHandle.hh" +#include "Offline/GlobalConstantsService/inc/ParticleDataList.hh" +#include "Offline/MCDataProducts/inc/SimParticle.hh" +#include "Offline/MCDataProducts/inc/StepPointMC.hh" +#include "Offline/SeedService/inc/SeedService.hh" +#include "Offline/STMMC/inc/VDResamplerTransformDefaults.hh" + +typedef cet::map_vector_key key_type; +typedef unsigned long VolumeId_type; + +namespace mu2e { + class VDResamplerTrain : public art::EDAnalyzer { + public: + using Name=fhicl::Name; + using Comment=fhicl::Comment; + struct Config { + fhicl::Atom StepPointMCsTag{ Name("StepPointMCsTag"), Comment("Tag identifying the StepPointMCs")}; + fhicl::Atom SimParticlemvTag{ Name("SimParticlemvTag"), Comment("Tag identifying the SimParticlemv")}; + fhicl::Atom SBDMuseTwoStageTraining{ Name("SBDMuseTwoStageTraining"), Comment("If true, train the two-stage factorized model. If false, train all 6 dimensions at once."), true}; + fhicl::Atom SBDMallAtOnceModelFile{ Name("SBDMallAtOnceModelFile"), Comment("CSV filename for the all-at-once 6D SBDM model parameters"), ""}; + fhicl::Atom SBDMstage1ModelFile{ Name("SBDMstage1ModelFile"), Comment("CSV filename for the trained stage-1 SBDM model parameters"), ""}; + fhicl::Atom SBDMstage2ModelFile{ Name("SBDMstage2ModelFile"), Comment("CSV filename for the trained stage-2 SBDM model parameters"), ""}; + fhicl::Atom VirtualDetectorID{ Name("VirtualDetectorID"), Comment("ID of the virtual detector to train on"), 116}; + fhicl::Atom VDz0{ Name("VDz0"), Comment("z coordinate of the virtual detector"), 37700.39}; + fhicl::Atom VDr{ Name("VDr"), Comment("VD radius"), 2000.0 }; + fhicl::Atom pdgID{ Name("pdgID"), Comment("pdgID of the particle to train on"), 22}; + fhicl::Atom SBDMhidden{ Name("SBDMhidden"), Comment("Size of hidden layers in the SBDM neural network"), 128}; + fhicl::Atom SBDMlayers{ Name("SBDMlayers"), Comment("Number of layers in the SBDM neural network"), 4}; + fhicl::Atom SBDMoptimizer{ Name("SBDMoptimizer"), Comment("Optimizer for training the SBDM neural network (SGD or ADAM)"), "ADAM"}; + fhicl::Atom SBDMadamBeta1{ Name("SBDMadamBeta1"), Comment("Adam optimizer beta1 parameter"), 0.9}; + fhicl::Atom SBDMadamBeta2{ Name("SBDMadamBeta2"), Comment("Adam optimizer beta2 parameter"), 0.999}; + fhicl::Atom SBDMadamEps{ Name("SBDMadamEps"), Comment("Adam optimizer epsilon parameter"), 1e-8}; + fhicl::Atom SBDMnoiseSchedule{ Name("SBDMnoiseSchedule"), Comment("Noise schedule for the SBDM (LINEAR or COSINE)"), "COSINE"}; + fhicl::Atom SBDMbetaMin{ Name("SBDMbetaMin"), Comment("Minimum noise schedule parameter (for LINEAR schedule)") , 1e-4}; + fhicl::Atom SBDMbetaMax{ Name("SBDMbetaMax"), Comment("Maximum noise schedule parameter (for LINEAR schedule)"), 0.02}; + fhicl::Atom SBDMcosineOffset{ Name("SBDMcosineOffset"), Comment("Offset parameter (for cosine schedule)"), 0.008}; + fhicl::Atom SBDMbatchSize{ Name("SBDMbatchSize"), Comment("Batch size for training the SBDM"), 32}; + fhicl::Atom SBDMgradientClip{ Name("SBDMgradientClip"), Comment("Gradient clipping threshold for training the SBDM"), 1.0}; + fhicl::Atom SBDMlearningRate{ Name("SBDMlearningRate"), Comment("Learning rate for training the SBDM"), 1e-3}; + fhicl::Atom SBDMdiffusionSteps{ Name("SBDMdiffusionSteps"), Comment("Number of steps in the diffusion process for the SBDM"), 200}; + fhicl::Atom SBDMtrainingSize{ Name("SBDMtrainingSize"), Comment("Size of the training data for the SBDM"), -1}; // -1 means use all available data + fhicl::Atom SBDMtrainingEpochs{ Name("SBDMtrainingEpochs"), Comment("Number of epochs to train the selected SBDM mode"), 10}; + }; + using Parameters = art::EDAnalyzer::Table; + explicit VDResamplerTrain(const Parameters& conf); + void analyze(const art::Event& e); + void endJob(); + private: + art::RandomNumberGenerator::base_engine_t& engine; + CLHEP::RandFlat randFlat_; + CLHEP::RandGaussQ randGaussQ_; + art::ProductToken StepPointMCsToken; + art::ProductToken SimParticlemvToken; + + // SBDM model + data + bool useTwoStageTraining = true; + std::unique_ptr allAtOnceModel; + std::unique_ptr stage1Model; + std::unique_ptr stage2Model; + std::vector allAtOnceTrainingData; + std::vector stage1TrainingData; + std::vector stage2TrainingData; + std::string SBDMallAtOnceModelFile; + std::string SBDMstage1ModelFile; + std::string SBDMstage2ModelFile; + VolumeId_type VirtualDetectorID = 0; + double VDz0 = 0.0; + double VDr = 0.0; + int pdgID = 0; + int trainingEpochs = 0; + int trainingSize = -1; + + // variables to read from the art event + int stepPdgId = 0; + double x = 0.0, y = 0.0, z = 0.0, time = 0.0; + double px = 0.0, py = 0.0, pz = 0.0; + VolumeId_type virtualdetectorId = 0; + + // transform variables for training data preparation + double x0 = vdresampler::kX0; + double y0 = vdresampler::kY0; + // time scaling + double t0 = vdresampler::kT0; + double tScale = vdresampler::kTScale; + }; + + VDResamplerTrain::VDResamplerTrain(const Parameters& conf) : + art::EDAnalyzer(conf), + engine(createEngine( art::ServiceHandle()->getSeed())), + randFlat_(engine), + randGaussQ_(engine), + StepPointMCsToken(consumes(conf().StepPointMCsTag())), + SimParticlemvToken(consumes(conf().SimParticlemvTag())), + useTwoStageTraining(conf().SBDMuseTwoStageTraining()), + SBDMallAtOnceModelFile(conf().SBDMallAtOnceModelFile()), + SBDMstage1ModelFile(conf().SBDMstage1ModelFile()), + SBDMstage2ModelFile(conf().SBDMstage2ModelFile()), + VirtualDetectorID(conf().VirtualDetectorID()), + VDz0(conf().VDz0()), + VDr(conf().VDr()), + pdgID(conf().pdgID()), + trainingEpochs(conf().SBDMtrainingEpochs()), + trainingSize(conf().SBDMtrainingSize()) + { + // optimizer selection + ScoreBasedDiffusionModel::OptimizerType opt = + (conf().SBDMoptimizer() == "SGD") ? + ScoreBasedDiffusionModel::OptimizerType::SGD : + ScoreBasedDiffusionModel::OptimizerType::ADAM; + + // noise schedule + ScoreBasedDiffusionModel::NoiseScheduleType sched = + (conf().SBDMnoiseSchedule() == "LINEAR") ? + ScoreBasedDiffusionModel::NoiseScheduleType::LINEAR : + ScoreBasedDiffusionModel::NoiseScheduleType::COSINE; + + if (useTwoStageTraining) { + // create stage-1 model for (t', x', y') + stage1Model = std::make_unique( + randFlat_, + randGaussQ_, + 3, // dim + 0, // conditionDim + conf().SBDMhidden(), + conf().SBDMlayers(), + opt, + conf().SBDMadamBeta1(), + conf().SBDMadamBeta2(), + conf().SBDMadamEps(), + sched, + conf().SBDMbetaMin(), + conf().SBDMbetaMax(), + conf().SBDMcosineOffset(), + conf().SBDMbatchSize(), + conf().SBDMgradientClip(), + conf().SBDMlearningRate(), + conf().SBDMdiffusionSteps() + ); + + // create stage-2 model for (p_r', p_phi', p_z' | t', x', y') + stage2Model = std::make_unique( + randFlat_, + randGaussQ_, + 3, // dim + 3, // conditionDim + conf().SBDMhidden(), + conf().SBDMlayers(), + opt, + conf().SBDMadamBeta1(), + conf().SBDMadamBeta2(), + conf().SBDMadamEps(), + sched, + conf().SBDMbetaMin(), + conf().SBDMbetaMax(), + conf().SBDMcosineOffset(), + conf().SBDMbatchSize(), + conf().SBDMgradientClip(), + conf().SBDMlearningRate(), + conf().SBDMdiffusionSteps() + ); + // allocate memory according to the training size (if specified, otherwise will grow dynamically) + if (trainingSize > 0) { + stage1TrainingData.reserve(trainingSize); + stage2TrainingData.reserve(trainingSize); + } else { + stage1TrainingData.reserve(1000); + stage2TrainingData.reserve(1000); + } + } else { + allAtOnceModel = std::make_unique( + randFlat_, + randGaussQ_, + 6, // dim + 0, // conditionDim + conf().SBDMhidden(), + conf().SBDMlayers(), + opt, + conf().SBDMadamBeta1(), + conf().SBDMadamBeta2(), + conf().SBDMadamEps(), + sched, + conf().SBDMbetaMin(), + conf().SBDMbetaMax(), + conf().SBDMcosineOffset(), + conf().SBDMbatchSize(), + conf().SBDMgradientClip(), + conf().SBDMlearningRate(), + conf().SBDMdiffusionSteps() + ); + // allocate memory according to the training size (if specified, otherwise will grow dynamically) + if (trainingSize > 0) { + allAtOnceTrainingData.reserve(trainingSize); + } else { + allAtOnceTrainingData.reserve(1000); + } + } + }; + + void VDResamplerTrain::analyze(const art::Event& event) { + // Get the data products from the event + auto const& StepPointMCs = event.getProduct(StepPointMCsToken); + if (StepPointMCs.empty()) + return; + auto const& SimParticles = event.getProduct(SimParticlemvToken); + if (SimParticles.empty()) + return; + + // Loop over all VD hits + for (const StepPointMC& step : StepPointMCs) { + // Get the associated particle + const SimParticle& particle = SimParticles.at(step.trackId()); + + // Extract the parameters + stepPdgId = particle.pdgId(); + virtualdetectorId = step.virtualDetectorId(); + time = step.time(); + x = step.position().x(); // This coordinate is in Mu2e frame, will be shifted to relative x w.r.t. the beamline + y = step.position().y(); + z = step.position().z(); + px = step.momentum().x(); + py = step.momentum().y(); + pz = step.momentum().z(); + + if (virtualdetectorId != VirtualDetectorID || (stepPdgId != pdgID && pdgID != 0) || pz <= 0) + continue; // Filter hits based on the virtual detector ID, particle type, and pz + + // as z maybe slightly different from the nominal VDz0 due to the step size, we will extrapolate the (x, y) coordinates + // to the nominal VDz0 for all hits to compute the training parameters to be fed into the SBDM. + double extrapolationFactor = (VDz0 - z) / pz; // Assuming linear motion, this is the factor to extrapolate from current z to z0 + double x_extrapolated = x + extrapolationFactor * px; + double y_extrapolated = y + extrapolationFactor * py; + + // Now we have the extrapolated (x, y) at the nominal VDz0, we can compute the training parameters for the SBDM. + // We convert (t, x_extrapolated, y_extrapolated, px, py, pz) to (t', x_extrapolated, y_extrapolated, p_r', p_phi', p_z') + + // shift to detector-centered coordinates + double dx = x_extrapolated - x0; + double dy = y_extrapolated - y0; + // polar coordinates + double r = std::sqrt(dx*dx + dy*dy); + double rho = r / VDr; + // numerical safety (avoid rho >= 1) + const double eps = 1e-6; + rho = std::min(rho, 1.0 - eps); + // boundary-removing transform + // u = atanh(r/R) + double u = 0.5 * std::log((1.0 + rho) / (1.0 - rho)); + // angle + double theta = std::atan2(dy, dx); + // map back to Cartesian-like coordinates + double x_trans = u * std::cos(theta); + double y_trans = u * std::sin(theta); + + // compute momentum components in the local polar coordinate system (r, phi, z) + double pr = 0.0; + double pphi = 0.0; + if (r > 1e-6) { // avoid division by zero, if r is very small, we can approximate pr ~ px and pphi ~ py + double rx = dx / r; // unit vector in the radial direction + double ry = dy / r; + double phix = -ry; // unit vector in the angular direction) + double phiy = rx; + pr = px*rx + py*ry; // radial momentum component + pphi = px*phix + py*phiy; + } + else { + pr = px; + pphi = py; + } + // momentum scaling + double p0 = vdresampler::kP0; // tunable scale where I want best resolution + double pr_t = std::asinh(pr / p0); + double pphi_t = std::asinh(pphi / p0); + double pz_t = std::asinh(pz / p0); + + // time transform + double t_safe = std::max(time, 0.1); // avoid log(0) + double t_trans = std::log(t_safe / t0) / tScale; + + if (useTwoStageTraining) { + DiffusionTrainingSample stage1Sample; + stage1Sample.x = {t_trans, x_trans, y_trans}; + stage1TrainingData.push_back(std::move(stage1Sample)); + + DiffusionTrainingSample stage2Sample; + stage2Sample.x = {pr_t, pphi_t, pz_t}; + stage2Sample.cond = {t_trans, x_trans, y_trans}; + stage2TrainingData.push_back(std::move(stage2Sample)); + } else { + DiffusionTrainingSample allAtOnceSample; + allAtOnceSample.x = {t_trans, x_trans, y_trans, pr_t, pphi_t, pz_t}; + allAtOnceTrainingData.push_back(std::move(allAtOnceSample)); + } + }; + return; + }; + + void VDResamplerTrain::endJob() { + + if (useTwoStageTraining && (stage1TrainingData.empty() || stage2TrainingData.empty())) { + mf::LogWarning("VDResamplerTrain") << "No training data collected."; + return; + } + if (!useTwoStageTraining && allAtOnceTrainingData.empty()) { + mf::LogWarning("VDResamplerTrain") << "No training data collected."; + return; + } + + // if SBDMtrainingSize is set and smaller than the collected training data, + // truncate the training data to the specified size. + if (useTwoStageTraining) { + if(trainingSize > 0 && (int)stage1TrainingData.size() > trainingSize) + stage1TrainingData.resize(trainingSize); + if(trainingSize > 0 && (int)stage2TrainingData.size() > trainingSize) + stage2TrainingData.resize(trainingSize); + + if (SBDMstage1ModelFile.empty() || SBDMstage2ModelFile.empty()) { + throw cet::exception("VDResamplerTrain") << "Two-stage training requires both SBDMstage1ModelFile and SBDMstage2ModelFile."; + } + + mf::LogInfo("VDResamplerTrain") + << "Training stage-1 diffusion model with " << stage1TrainingData.size() + << " samples and " << trainingEpochs << " epochs..."; + stage1Model->train(stage1TrainingData, trainingEpochs); + stage1Model->saveModel(SBDMstage1ModelFile); + mf::LogInfo("VDResamplerTrain") << "Stage-1 model saved to " << SBDMstage1ModelFile; + + mf::LogInfo("VDResamplerTrain") + << "Training stage-2 diffusion model with " << stage2TrainingData.size() + << " samples and " << trainingEpochs << " epochs..."; + stage2Model->train(stage2TrainingData, trainingEpochs); + stage2Model->saveModel(SBDMstage2ModelFile); + mf::LogInfo("VDResamplerTrain") << "Stage-2 model saved to " << SBDMstage2ModelFile; + + } else { + if(trainingSize > 0 && (int)allAtOnceTrainingData.size() > trainingSize) + allAtOnceTrainingData.resize(trainingSize); + + if (SBDMallAtOnceModelFile.empty()) { + throw cet::exception("VDResamplerTrain") << "All-at-once training requires SBDMallAtOnceModelFile."; + } + + mf::LogInfo("VDResamplerTrain") + << "Training all-at-once diffusion model with " << allAtOnceTrainingData.size() + << " samples and " << trainingEpochs << " epochs..."; + allAtOnceModel->train(allAtOnceTrainingData, trainingEpochs); + allAtOnceModel->saveModel(SBDMallAtOnceModelFile); + + mf::LogInfo("VDResamplerTrain") << "All-at-once model saved to " << SBDMallAtOnceModelFile; + } + + return; + }; + +}; // end namespace mu2e + +DEFINE_ART_MODULE(mu2e::VDResamplerTrain) From 9a255ebd5b640999a9ab4b6b5d73bf3bce28dd28 Mon Sep 17 00:00:00 2001 From: YongyiBWu <47263079+YongyiBWu@users.noreply.github.com> Date: Sun, 12 Apr 2026 14:16:43 -0400 Subject: [PATCH 2/4] Update STMMC/src/VDResamplerConfigure_module.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- STMMC/src/VDResamplerConfigure_module.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/STMMC/src/VDResamplerConfigure_module.cc b/STMMC/src/VDResamplerConfigure_module.cc index 82db041969..97e40f14df 100644 --- a/STMMC/src/VDResamplerConfigure_module.cc +++ b/STMMC/src/VDResamplerConfigure_module.cc @@ -214,7 +214,14 @@ namespace mu2e { // Generate the fcl file for training for this particle type // if pdgID is negative, we will use "m" instead of "-" in the filename to avoid issues with file naming std::string pdgIdstr = (part.first < 0) ? "m" + std::to_string(-part.first) : std::to_string(part.first); - std::string moduleName = "VDResamplerTrainVD"+ std::to_string(VirtualDetectorID) + dataSourceTag + "pdg" + pdgIdstr; + std::string sanitizedDataSourceTag = dataSourceTag; + for (char& c : sanitizedDataSourceTag) { + const bool isAlphaNum = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); + if (!isAlphaNum && c != '_') { + c = '_'; + } + } + std::string moduleName = "VDResamplerTrainVD"+ std::to_string(VirtualDetectorID) + sanitizedDataSourceTag + "pdg" + pdgIdstr; std::string pathName = "trainPathVD" + std::to_string(VirtualDetectorID) + "pdg" + pdgIdstr; trainingPaths.push_back({moduleName, pathName}); fclOutFile << " " << moduleName << " : {\n" From 92e3c8cb47c9d0a479bc223ce36245909b61064b Mon Sep 17 00:00:00 2001 From: YongyiBWu <47263079+YongyiBWu@users.noreply.github.com> Date: Mon, 13 Apr 2026 00:49:18 -0400 Subject: [PATCH 3/4] Fixes in response to PR #1798 code reviews Introduce VDResamplerTransforms (new header) and remove legacy VDResamplerTransformDefaults; centralize forward/inverse sample transforms and numeric safety constants. Harden ScoreBasedDiffusionModel by: adding initializeRandomWeights ctor flag, using size_t for trainingSampleSize_, initializing layer buffers safely, guarding weight initialization, adding dimension checks in computeLoss, improving gradient/Adam math/formatting, fixing gradient clipping early-return, validating and reconstructing loaded network shapes (loadModel uses initializeRandomWeights=false), and various style/robustness fixes. Update STMMC modules (Configure/GenerateFromModel/GenerateMix/Train) to use the new transforms, add runtime validation for VDr/VDz0, improve config parsing with warnings, initialize pointers, replace inlined transform code with calls into VDResampler, and minor API/constructor fixes (HepLorentzVector usage). Misc: remove unused typedefs, add small constants (kTwoStageTrainingHitThreshold), includes, and other small cleanups to improve stability and correctness. --- .../inc/ScoreBasedDiffusionModel.hh | 43 ++-- .../src/ScoreBasedDiffusionModel.cc | 216 +++++++++++------- STMMC/inc/VDResamplerTransformDefaults.hh | 13 -- STMMC/inc/VDResamplerTransforms.hh | 148 ++++++++++++ STMMC/src/VDResamplerConfigure_module.cc | 9 +- .../VDResamplerGenerateFromModel_module.cc | 77 ++++--- STMMC/src/VDResamplerGenerateMix_module.cc | 73 +++--- STMMC/src/VDResamplerTrain_module.cc | 134 +++++------ 8 files changed, 456 insertions(+), 257 deletions(-) delete mode 100644 STMMC/inc/VDResamplerTransformDefaults.hh create mode 100644 STMMC/inc/VDResamplerTransforms.hh diff --git a/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh b/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh index 2bf4c7c697..a7cfd78659 100644 --- a/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh +++ b/MachineLearningTools/inc/ScoreBasedDiffusionModel.hh @@ -45,24 +45,26 @@ namespace mu2e{ // Constructor: Initialize diffusion model with CLHEP random distributions. // // Parameters: - // randFlat - Reference to CLHEP RandFlat for uniform sampling (externally managed) - // randGaussQ - Reference to CLHEP RandGaussQ for Gaussian noise (externally managed) - // dim - Dimensionality of the state space - // conditionDim - Dimensionality of the optional conditioning vector (default: 0 for unconditional model) - // hidden - Size of hidden layers in the neural network - // layers - Number of layers in the network - // optimizerType - Type of optimizer to use (SGD or ADAM, default: ADAM) - // adamBeta1 - Adam optimizer beta1 parameter (default: 0.9) - // adamBeta2 - Adam optimizer beta2 parameter (default: 0.999) - // adamEps - Adam optimizer epsilon parameter (default: 1e-8) - // scheduleType - Type of noise schedule (LINEAR or COSINE, default: COSINE) - // betaMin - Minimum noise schedule parameter (for LINEAR schedule, default: 1e-4) - // betaMax - Maximum noise schedule parameter (for LINEAR schedule, default: 0.02) - // cosineOffset - Offset parameter (for cosine schedule, default: 0.008) - // batchSize - Batch size for training (default: 32) - // gradientClipThreshold - Threshold for gradient clipping (default: 1.0) - // learningRate - Learning rate for training (default: 1e-3) - // diffusionSteps - Number of steps in the diffusion process (default: 200) + // randFlat - Reference to CLHEP RandFlat for uniform sampling (externally managed) + // randGaussQ - Reference to CLHEP RandGaussQ for Gaussian noise (externally managed) + // dim - Dimensionality of the state space + // conditionDim - Dimensionality of the optional conditioning vector (default: 0 for unconditional model) + // hidden - Size of hidden layers in the neural network + // layers - Number of layers in the network + // optimizerType - Type of optimizer to use (SGD or ADAM, default: ADAM) + // adamBeta1 - Adam optimizer beta1 parameter (default: 0.9) + // adamBeta2 - Adam optimizer beta2 parameter (default: 0.999) + // adamEps - Adam optimizer epsilon parameter (default: 1e-8) + // scheduleType - Type of noise schedule (LINEAR or COSINE, default: COSINE) + // betaMin - Minimum noise schedule parameter (for LINEAR schedule, default: 1e-4) + // betaMax - Maximum noise schedule parameter (for LINEAR schedule, default: 0.02) + // cosineOffset - Offset parameter (for cosine schedule, default: 0.008) + // batchSize - Batch size for training (default: 32) + // gradientClipThreshold - Threshold for gradient clipping (default: 1.0) + // learningRate - Learning rate for training (default: 1e-3) + // diffusionSteps - Number of steps in the diffusion process (default: 200) + // initializeRandomWeights - If true, initialize network weights from random Gaussian draws. + // Set false when constructing (loading) from a saved model. ScoreBasedDiffusionModel( // Network architecture parameters CLHEP::RandFlat& randFlat, @@ -86,7 +88,8 @@ namespace mu2e{ double gradientClipThreshold = 1.0, double learningRate = 1e-3, // Diffusion process configuration - int diffusionSteps = 200 + int diffusionSteps = 200, + bool initializeRandomWeights = true ); // Train the score network on a batch of samples. @@ -312,7 +315,7 @@ namespace mu2e{ // Training state double runningLoss_; // Accumulated loss for monitoring during training int adamStep_; // Step counter for Adam optimizer (used to compute bias-corrected moment estimates) - int trainingSampleSize_; // Total number of training samples + size_t trainingSampleSize_; // Total number of training samples // Container for tracking training loss over epochs std::vector epochLosses_; diff --git a/MachineLearningTools/src/ScoreBasedDiffusionModel.cc b/MachineLearningTools/src/ScoreBasedDiffusionModel.cc index 5cf8b2dc34..31ed371129 100644 --- a/MachineLearningTools/src/ScoreBasedDiffusionModel.cc +++ b/MachineLearningTools/src/ScoreBasedDiffusionModel.cc @@ -56,7 +56,8 @@ namespace mu2e { int batchSize, double gradientClipThreshold, double learningRate, - int diffusionSteps + int diffusionSteps, + bool initializeRandomWeights ) : randFlat_(randFlat), randGaussQ_(randGaussQ), dim_(dim), conditionDim_(conditionDim), hidden_(hidden), layers_(layers), optimizerType_(optimizerType), @@ -102,10 +103,10 @@ namespace mu2e { Layer layer; // Allocate weights - layer.W.resize(out, std::vector(in)); + layer.W.resize(out, std::vector(in, 0.0)); // Allocate biases - layer.b.resize(out); + layer.b.resize(out, 0.0); // Allocate gradient buffers layer.gradW.resize(out, std::vector(in, 0.0)); @@ -118,18 +119,19 @@ namespace mu2e { layer.mb.resize(out, 0.0); layer.vb.resize(out, 0.0); - // -------------------------------------------------------- - // Weight initialization - // - // Small Gaussian initialization improves training stability - // -------------------------------------------------------- - - for (int i = 0; i < out; ++i) { - for (int j = 0; j < in; ++j) { - layer.W[i][j] = weightInitScale * randGaussQ_.fire(); - // this generates a Gaussian random number with mean=0 and sigma=weightInitScale + if (initializeRandomWeights) { + // -------------------------------------------------------- + // Weight initialization + // + // Small Gaussian initialization improves training stability + // -------------------------------------------------------- + for (int i = 0; i < out; ++i) { + for (int j = 0; j < in; ++j) { + layer.W[i][j] = weightInitScale * randGaussQ_.fire(); + // this generates a Gaussian random number with mean=0 and sigma=weightInitScale + } + layer.b[i] = 0.0; } - layer.b[i] = 0.0; } if (layer.W.empty() || layer.W[0].size() != static_cast(in) || //Check that weight matrix has correct input dimension @@ -227,10 +229,10 @@ namespace mu2e { // Apply activation function (SiLU) to get the output of the current layer. std::vector y(z.size()); - if(l != network_.size()-1) // No activation on the last layer (output layer), as it predicts the score directly. - { - for (size_t i=0;i gradZ(grad.size()); // For the output layer, the gradient is directly from the loss w.r.t. output (no activation function). - if(l == static_cast(network_.size())-1) - { + if (l == static_cast(network_.size()) - 1) { gradZ = grad; - } - // For hidden layers, apply the derivative of the activation function (SiLU) to the gradient. - else - { - for (size_t i=0;i