From cf5f4c95cffaf3a71e21128e44a041ebe1282627 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 9 Dec 2024 18:42:47 +0100 Subject: [PATCH] Improve logging in case of OOM --- .../ITS/tracking/include/ITStracking/TimeFrame.h | 1 + Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx | 14 ++++++++++++++ Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 15 +++++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index fa4f33782d16a..0237f4ce9579b 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -259,6 +259,7 @@ class TimeFrame void printCellLUTonLayer(int i); void printTrackletLUTs(); void printCellLUTs(); + void printROFInfo(const int rofId); IndexTableUtils mIndexTableUtils; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index f614de9b9f26a..40a540015d75f 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -608,5 +608,19 @@ void TimeFrame::printNClsPerROF() std::cout << std::endl; } } + +void TimeFrame::printROFInfo(const int rofId) +{ + std::cout << "ROF " << rofId << " dump:" << std::endl; + for (int iLayer{0}; iLayer < mClusters.size(); ++iLayer) { + std::cout << "Layer " << iLayer << " has: " << getClustersOnLayer(rofId, iLayer).size() << " clusters." << std::endl; + } + std::cout << "Number of seeding vertices: " << getPrimaryVertices(rofId).size() << std::endl; + int iVertex{0}; + for (auto& v : getPrimaryVertices(rofId)) { + std::cout << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors." << std::endl; + } +} + } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 721452bf0361d..7b06f7c7bcc76 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -35,6 +35,7 @@ namespace o2 { namespace its { +using o2::its::constants::GB; Tracker::Tracker(o2::its::TrackerTraits* traits) { @@ -74,12 +75,15 @@ void Tracker::clustersToTracks(std::function logger, std::f &Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during trackleting in iteration {}, check the detector status and/or the selections.", iteration)); + mTimeFrame->printROFInfo(iROFs); + error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); break; } float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, mTrkParams[iteration].TrackletsPerClusterLimit)); + error(fmt::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", + trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit)); break; } @@ -87,12 +91,15 @@ void Tracker::clustersToTracks(std::function logger, std::f &Tracker::computeCells, "Cell finding", [](std::string) {}, iteration); nCells += mTraits->getTFNumberOfCells(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during cell finding in iteration {}, check the detector status and/or the selections.", iteration)); + mTimeFrame->printROFInfo(iROFs); + error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); break; } float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, mTrkParams[iteration].CellsPerClusterLimit)); + error(fmt::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", + cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit)); break; }