From 19a996ad45ea4cee333c22322e0a5118a327970b Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Fri, 2 Nov 2018 17:25:05 -0400 Subject: [PATCH 1/2] ENH: Update ImageToHistogram to use modern dynamic threading. Change-Id: I979069f0ae1fdde9175a1f86853378db4c86ca5b --- .../include/itkImageToHistogramFilter.h | 14 +- .../include/itkImageToHistogramFilter.hxx | 178 +++++++----------- .../include/itkMaskedImageToHistogramFilter.h | 4 +- .../itkMaskedImageToHistogramFilter.hxx | 42 ++++- 4 files changed, 113 insertions(+), 125 deletions(-) diff --git a/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.h b/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.h index 8fd4779d8ef..30ceb25021c 100644 --- a/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.h +++ b/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.h @@ -122,23 +122,25 @@ class ITK_TEMPLATE_EXPORT ImageToHistogramFilter:public ImageTransformer void GenerateData() override; void BeforeThreadedGenerateData() override; - void ThreadedGenerateData(const RegionType & inputRegionForThread, ThreadIdType threadId) override; void AfterThreadedGenerateData() override; - static ITK_THREAD_RETURN_FUNCTION_CALL_CONVENTION ThreaderMinMaxCallback(void *arg); /** Method that construct the outputs */ using DataObjectPointerArraySizeType = ProcessObject::DataObjectPointerArraySizeType; using Superclass::MakeOutput; DataObject::Pointer MakeOutput(DataObjectPointerArraySizeType) override; - virtual void ThreadedComputeMinimumAndMaximum( const RegionType & inputRegionForThread, ThreadIdType threadId ); + virtual void ThreadedComputeHistogram(const RegionType &); + virtual void ThreadedComputeMinimumAndMaximum( const RegionType & inputRegionForThread ); - std::vector< HistogramPointer > m_Histograms; - std::vector< HistogramMeasurementVectorType > m_Minimums; - std::vector< HistogramMeasurementVectorType > m_Maximums; + std::mutex m_Mutex; + + HistogramMeasurementVectorType m_Minimum; + HistogramMeasurementVectorType m_Maximum; private: void ApplyMarginalScale( HistogramMeasurementVectorType & min, HistogramMeasurementVectorType & max, HistogramSizeType & size ); + + }; } // end of namespace Statistics } // end of namespace itk diff --git a/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.hxx b/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.hxx index 6601192ac92..0ca3caa844d 100644 --- a/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.hxx +++ b/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.hxx @@ -103,19 +103,12 @@ ImageToHistogramFilter< TImage > this->AllocateOutputs(); this->BeforeThreadedGenerateData(); - m_Histograms[0] = this->GetOutput(); // just use the main one - m_Histograms[0]->SetClipBinsAtEnds(true); - for (unsigned i=1; iSetClipBinsAtEnds(true); - } + HistogramType *outputHistogram = this->GetOutput(); + outputHistogram->SetClipBinsAtEnds(true); // the parameter needed to initialize the histogram - unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); + const unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); HistogramSizeType size(nbOfComponents); - HistogramMeasurementVectorType min(nbOfComponents); - HistogramMeasurementVectorType max(nbOfComponents); if( this->GetHistogramSizeInput() ) { // user provided value @@ -128,108 +121,67 @@ ImageToHistogramFilter< TImage > } this->UpdateProgress(0.01f); - //HistogramType * hist = m_Histograms[threadId]; if( this->GetAutoMinimumMaximumInput() && this->GetAutoMinimumMaximum() ) { // we have to compute the minimum and maximum values - this->ClassicMultiThread(this->ThreaderMinMaxCallback); //calls ThreadedComputeMinimumAndMaximum + this->GetMultiThreader()-> template ParallelizeImageRegion( + this->GetInput()->GetRequestedRegion(), + [this](const RegionType & inputRegionForThread) + {this->ThreadedComputeMinimumAndMaximum(inputRegionForThread);}, + this); + this->UpdateProgress(0.3f); - min = m_Minimums[0]; - max = m_Maximums[0]; - for( unsigned int t=1; tApplyMarginalScale( min, max, size ); + this->ApplyMarginalScale( m_Minimum, m_Maximum, size ); + } else { if( this->GetHistogramBinMinimumInput() ) { - min = this->GetHistogramBinMinimum(); + m_Minimum = this->GetHistogramBinMinimum(); } else { - min.Fill( NumericTraits::NonpositiveMin() - 0.5 ); + m_Minimum.Fill( NumericTraits::NonpositiveMin() - 0.5 ); } if( this->GetHistogramBinMaximumInput() ) { - max = this->GetHistogramBinMaximum(); + m_Maximum = this->GetHistogramBinMaximum(); } else { - max.Fill( NumericTraits::max() + 0.5 ); + m_Maximum.Fill( NumericTraits::max() + 0.5 ); // this->ApplyMarginalScale( min, max, size ); } } - // finally, initialize the histograms - for (unsigned i=0; iSetMeasurementVectorSize(nbOfComponents); - m_Histograms[i]->Initialize(size, min, max); - } + outputHistogram->SetMeasurementVectorSize(nbOfComponents); + outputHistogram->Initialize(size, m_Minimum, m_Maximum); - this->ClassicMultiThread(this->ThreaderCallback); //parallelizes ThreadedGenerateData + this->GetMultiThreader()-> template ParallelizeImageRegion( + this->GetInput()->GetRequestedRegion(), + [this](const RegionType & inputRegionForThread) + {this->ThreadedComputeHistogram(inputRegionForThread);}, + this); this->UpdateProgress(0.8f); this->AfterThreadedGenerateData(); this->UpdateProgress(1.0f); } - -template< typename TImage > -ITK_THREAD_RETURN_FUNCTION_CALL_CONVENTION -ImageToHistogramFilter< TImage > -::ThreaderMinMaxCallback(void *arg) -{ - using ThreadInfo = MultiThreaderBase::WorkUnitInfo; - ThreadInfo * threadInfo = static_cast(arg); - ThreadIdType threadId = threadInfo->WorkUnitID; - ThreadIdType threadCount = threadInfo->NumberOfWorkUnits; - using FilterStruct = typename ImageTransformer< TImage >::ThreadStruct; - FilterStruct* str = (FilterStruct *)(threadInfo->UserData); - Self* filter = static_cast(str->Filter.GetPointer()); - - // execute the actual method with appropriate output region - // first find out how many pieces extent can be split into. - typename TImage::RegionType splitRegion; - ThreadIdType total = filter->SplitRequestedRegion(threadId, threadCount, splitRegion); - - if ( threadId < total ) - { - filter->ThreadedComputeMinimumAndMaximum(splitRegion, threadId); - } - // else don't use this thread. Threads were not split conveniently. - return ITK_THREAD_RETURN_DEFAULT_VALUE; -} - template< typename TImage > void ImageToHistogramFilter< TImage > ::BeforeThreadedGenerateData() { - // find the actual number of threads - long nbOfThreads = this->GetNumberOfWorkUnits(); - this->GetMultiThreader()->SetNumberOfWorkUnits( nbOfThreads ); - // number of work units could be clamped, so get actual number - nbOfThreads = this->GetMultiThreader()->GetNumberOfWorkUnits(); - // number of work units can be constrained by the region size, - // so call the SplitRequestedRegion - // to get the real number of threads which will be used - RegionType splitRegion; // dummy region - just to call the following method - nbOfThreads = this->SplitRequestedRegion( 0, nbOfThreads, splitRegion ); - - // and allocate one histogram per thread - m_Histograms.resize(nbOfThreads); - m_Minimums.resize(nbOfThreads); - m_Maximums.resize(nbOfThreads); + const unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); + m_Minimum = HistogramMeasurementVectorType( nbOfComponents ); + m_Maximum = HistogramMeasurementVectorType( nbOfComponents ); + + m_Minimum.Fill( NumericTraits::max() ); + m_Maximum.Fill( NumericTraits::NonpositiveMin() ); } template< typename TImage > @@ -237,36 +189,15 @@ void ImageToHistogramFilter< TImage > ::AfterThreadedGenerateData() { - // group the results in the output histogram - HistogramType * hist = m_Histograms[0]; - typename HistogramType::IndexType index; - for( unsigned int i=1; iBegin(); - HistogramIterator end = m_Histograms[i]->End(); - while ( hit != end ) - { - hist->GetIndex( hit.GetMeasurementVector(), index); - hist->IncreaseFrequencyOfIndex( index, hit.GetFrequency() ); - ++hit; - } - } - - // and drop the temporary histograms - m_Histograms.clear(); - m_Minimums.clear(); - m_Maximums.clear(); } template< typename TImage > void ImageToHistogramFilter< TImage > -::ThreadedComputeMinimumAndMaximum(const RegionType & inputRegionForThread, ThreadIdType threadId ) +::ThreadedComputeMinimumAndMaximum(const RegionType & inputRegionForThread) { - unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); + const unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); HistogramMeasurementVectorType min( nbOfComponents ); HistogramMeasurementVectorType max( nbOfComponents ); @@ -287,16 +218,29 @@ ImageToHistogramFilter< TImage > } ++inputIt; } - m_Minimums[threadId] = min; - m_Maximums[threadId] = max; + + std::lock_guard mutexHolder( m_Mutex ); + for( unsigned int i=0; i void ImageToHistogramFilter< TImage > -::ThreadedGenerateData(const RegionType & inputRegionForThread, ThreadIdType threadId) +::ThreadedComputeHistogram(const RegionType & inputRegionForThread ) { - unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); + const unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); + const HistogramType *outputHistogram = this->GetOutput(); + + HistogramPointer histogram = HistogramType::New(); + histogram->SetClipBinsAtEnds(outputHistogram->GetClipBinsAtEnds()); + histogram->SetMeasurementVectorSize(nbOfComponents); + histogram->Initialize(outputHistogram->GetSize(), m_Minimum, m_Maximum); + + ImageRegionConstIterator< TImage > inputIt( this->GetInput(), inputRegionForThread ); inputIt.GoToBegin(); HistogramMeasurementVectorType m( nbOfComponents ); @@ -306,10 +250,27 @@ ImageToHistogramFilter< TImage > { const PixelType & p = inputIt.Get(); NumericTraits::AssignToArray( p, m ); - m_Histograms[threadId]->GetIndex( m, index ); - m_Histograms[threadId]->IncreaseFrequencyOfIndex( index, 1 ); + histogram->GetIndex( m, index ); + histogram->IncreaseFrequencyOfIndex( index, 1 ); ++inputIt; } + + + std::lock_guard mutexHolder( m_Mutex ); + + // reduce the results in the output histogram + HistogramType *writableOutputHistogram = this->GetOutput(); + + using HistogramIterator = typename HistogramType::ConstIterator; + + HistogramIterator hit = histogram->Begin(); + HistogramIterator end = histogram->End(); + while ( hit != end ) + { + writableOutputHistogram->GetIndex( hit.GetMeasurementVector(), index); + writableOutputHistogram->IncreaseFrequencyOfIndex( index, hit.GetFrequency() ); + ++hit; + } } template< typename TImage > @@ -317,7 +278,7 @@ void ImageToHistogramFilter< TImage > ::ApplyMarginalScale( HistogramMeasurementVectorType & min, HistogramMeasurementVectorType & max, HistogramSizeType & size ) { - unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); + const unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); bool clipHistograms = true; for ( unsigned int i = 0; i < nbOfComponents; i++ ) { @@ -376,10 +337,7 @@ ImageToHistogramFilter< TImage > } if( clipHistograms == false ) { - for( unsigned int i=0; iSetClipBinsAtEnds(false); - } + this->GetOutput()->SetClipBinsAtEnds(false); } } diff --git a/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.h b/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.h index b39d60e766a..98fb2dee89d 100644 --- a/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.h +++ b/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.h @@ -84,8 +84,8 @@ class ITK_TEMPLATE_EXPORT MaskedImageToHistogramFilter:public ImageToHistogramFi MaskedImageToHistogramFilter(); ~MaskedImageToHistogramFilter() override = default; - void ThreadedGenerateData( const RegionType & inputRegionForThread, ThreadIdType threadId ) override; - void ThreadedComputeMinimumAndMaximum( const RegionType & inputRegionForThread, ThreadIdType threadId ) override; + void ThreadedComputeHistogram( const RegionType & inputRegionForThread ) override; + void ThreadedComputeMinimumAndMaximum( const RegionType & inputRegionForThread) override; }; } // end of namespace Statistics } // end of namespace itk diff --git a/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.hxx b/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.hxx index eeca0d34c6d..985f47e0065 100644 --- a/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.hxx +++ b/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.hxx @@ -37,7 +37,7 @@ MaskedImageToHistogramFilter< TImage, TMaskImage > template< typename TImage, typename TMaskImage > void MaskedImageToHistogramFilter< TImage, TMaskImage > -::ThreadedComputeMinimumAndMaximum( const RegionType & inputRegionForThread, ThreadIdType threadId ) +::ThreadedComputeMinimumAndMaximum( const RegionType & inputRegionForThread ) { unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); HistogramMeasurementVectorType min( nbOfComponents ); @@ -68,16 +68,27 @@ MaskedImageToHistogramFilter< TImage, TMaskImage > ++inputIt; ++maskIt; } - this->m_Minimums[threadId] = min; - this->m_Maximums[threadId] = max; + std::lock_guard mutexHolder( this->m_Mutex ); + for( unsigned int i=0; im_Minimum[i] = std::min( this->m_Minimum[i], min[i] ); + this->m_Maximum[i] = std::max( this->m_Maximum[i], max[i] ); + } } template< typename TImage, typename TMaskImage > void MaskedImageToHistogramFilter< TImage, TMaskImage > -::ThreadedGenerateData(const RegionType & inputRegionForThread, ThreadIdType threadId) +::ThreadedComputeHistogram(const RegionType & inputRegionForThread) { - unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); + const unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); + const HistogramType *outputHistogram = this->GetOutput(); + + HistogramPointer histogram = HistogramType::New(); + histogram->SetClipBinsAtEnds(outputHistogram->GetClipBinsAtEnds()); + histogram->SetMeasurementVectorSize(nbOfComponents); + histogram->Initialize(outputHistogram->GetSize(), this->m_Minimum, this->m_Maximum); + ImageRegionConstIterator< TImage > inputIt( this->GetInput(), inputRegionForThread ); ImageRegionConstIterator< TMaskImage > maskIt( this->GetMaskImage(), inputRegionForThread ); inputIt.GoToBegin(); @@ -92,12 +103,29 @@ MaskedImageToHistogramFilter< TImage, TMaskImage > { const PixelType & p = inputIt.Get(); NumericTraits::AssignToArray( p, m ); - this->m_Histograms[threadId]->GetIndex( m, index ); - this->m_Histograms[threadId]->IncreaseFrequencyOfIndex( index, 1 ); + histogram->GetIndex( m, index ); + histogram->IncreaseFrequencyOfIndex( index, 1 ); } ++inputIt; ++maskIt; } + + + std::lock_guard mutexHolder( this->m_Mutex ); + + // reduce the results in the output histogram + HistogramType *writableOutputHistogram = this->GetOutput(); + + using HistogramIterator = typename HistogramType::ConstIterator; + + HistogramIterator hit = histogram->Begin(); + HistogramIterator end = histogram->End(); + while ( hit != end ) + { + writableOutputHistogram->GetIndex( hit.GetMeasurementVector(), index); + writableOutputHistogram->IncreaseFrequencyOfIndex( index, hit.GetFrequency() ); + ++hit; + } } } // end of namespace Statistics From ad0a25d6035d3bf5d21fa1caccbfc09ab3577065 Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Mon, 5 Nov 2018 12:41:02 -0500 Subject: [PATCH 2/2] PERF: implement concurrent histogram merge/reduce This reduces the expected complexity of merging histograms form O(n) to O(lg(n)), where n is the number of work units the image is split into. Change-Id: Iee81714bfa14615fa50439b4f32e403191f62392 --- .../include/itkImageToHistogramFilter.h | 5 ++ .../include/itkImageToHistogramFilter.hxx | 60 +++++++++++++++---- .../itkMaskedImageToHistogramFilter.hxx | 16 +---- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.h b/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.h index 30ceb25021c..f24c40d6a64 100644 --- a/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.h +++ b/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.h @@ -132,8 +132,13 @@ class ITK_TEMPLATE_EXPORT ImageToHistogramFilter:public ImageTransformer virtual void ThreadedComputeHistogram(const RegionType &); virtual void ThreadedComputeMinimumAndMaximum( const RegionType & inputRegionForThread ); + + virtual void ThreadedMergeHistogram( HistogramType *histogram ); + std::mutex m_Mutex; + HistogramPointer m_MergeHistogram; + HistogramMeasurementVectorType m_Minimum; HistogramMeasurementVectorType m_Maximum; diff --git a/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.hxx b/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.hxx index 0ca3caa844d..8d03b8da646 100644 --- a/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.hxx +++ b/Modules/Numerics/Statistics/include/itkImageToHistogramFilter.hxx @@ -153,8 +153,8 @@ ImageToHistogramFilter< TImage > else { m_Maximum.Fill( NumericTraits::max() + 0.5 ); - // this->ApplyMarginalScale( min, max, size ); } + // No marginal scaling is applied in this case } outputHistogram->SetMeasurementVectorSize(nbOfComponents); @@ -182,6 +182,8 @@ ImageToHistogramFilter< TImage > m_Minimum.Fill( NumericTraits::max() ); m_Maximum.Fill( NumericTraits::NonpositiveMin() ); + + m_MergeHistogram = nullptr; } template< typename TImage > @@ -189,6 +191,9 @@ void ImageToHistogramFilter< TImage > ::AfterThreadedGenerateData() { + HistogramType *outputHistogram = this->GetOutput(); + outputHistogram->Graft(m_MergeHistogram); + m_MergeHistogram = nullptr; } @@ -230,7 +235,7 @@ ImageToHistogramFilter< TImage > template< typename TImage > void ImageToHistogramFilter< TImage > -::ThreadedComputeHistogram(const RegionType & inputRegionForThread ) +::ThreadedComputeHistogram(const RegionType & inputRegionForThread) { const unsigned int nbOfComponents = this->GetInput()->GetNumberOfComponentsPerPixel(); const HistogramType *outputHistogram = this->GetOutput(); @@ -255,21 +260,50 @@ ImageToHistogramFilter< TImage > ++inputIt; } + this->ThreadedMergeHistogram( histogram ); +} - std::lock_guard mutexHolder( m_Mutex ); +template< typename TImage > +void +ImageToHistogramFilter< TImage > +::ThreadedMergeHistogram(HistogramType *histogram) +{ + while (true) + { - // reduce the results in the output histogram - HistogramType *writableOutputHistogram = this->GetOutput(); + std::unique_lock lock(m_Mutex); - using HistogramIterator = typename HistogramType::ConstIterator; + if (m_MergeHistogram.IsNull()) + { + m_MergeHistogram = std::move(histogram); + return; + } + else + { - HistogramIterator hit = histogram->Begin(); - HistogramIterator end = histogram->End(); - while ( hit != end ) - { - writableOutputHistogram->GetIndex( hit.GetMeasurementVector(), index); - writableOutputHistogram->IncreaseFrequencyOfIndex( index, hit.GetFrequency() ); - ++hit; + // merge/reduce the local results with current values in m_MergeHistogram + + // take ownership locally + HistogramPointer tomergeHistogram; + swap( m_MergeHistogram, tomergeHistogram ); + + // allow other threads to merge data + lock.unlock(); + + using HistogramIterator = typename HistogramType::ConstIterator; + + HistogramIterator hit = tomergeHistogram->Begin(); + HistogramIterator end = tomergeHistogram->End(); + + typename HistogramType::IndexType index; + + while ( hit != end ) + { + histogram->GetIndex( hit.GetMeasurementVector(), index); + histogram->IncreaseFrequencyOfIndex( index, hit.GetFrequency() ); + ++hit; + } + } } } diff --git a/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.hxx b/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.hxx index 985f47e0065..fb36ab7dfa0 100644 --- a/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.hxx +++ b/Modules/Numerics/Statistics/include/itkMaskedImageToHistogramFilter.hxx @@ -111,21 +111,7 @@ MaskedImageToHistogramFilter< TImage, TMaskImage > } - std::lock_guard mutexHolder( this->m_Mutex ); - - // reduce the results in the output histogram - HistogramType *writableOutputHistogram = this->GetOutput(); - - using HistogramIterator = typename HistogramType::ConstIterator; - - HistogramIterator hit = histogram->Begin(); - HistogramIterator end = histogram->End(); - while ( hit != end ) - { - writableOutputHistogram->GetIndex( hit.GetMeasurementVector(), index); - writableOutputHistogram->IncreaseFrequencyOfIndex( index, hit.GetFrequency() ); - ++hit; - } + this->ThreadedMergeHistogram( histogram ); } } // end of namespace Statistics