From 34cf190184c3688f2ce8f69cda0a7ca098ea8a4c Mon Sep 17 00:00:00 2001 From: wjs018 Date: Sun, 2 Oct 2022 22:32:13 -0400 Subject: [PATCH 01/19] Initial implementation of HashDetector. --- scenedetect/detectors/hash_detector.py | 168 +++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 scenedetect/detectors/hash_detector.py diff --git a/scenedetect/detectors/hash_detector.py b/scenedetect/detectors/hash_detector.py new file mode 100644 index 00000000..bf072115 --- /dev/null +++ b/scenedetect/detectors/hash_detector.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# --------------------------------------------------------------- +# [ Site: http://www.bcastell.com/projects/PySceneDetect/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# [ Documentation: http://pyscenedetect.readthedocs.org/ ] +# +# Copyright (C) 2014-2022 Brandon Castellano . +# +# PySceneDetect is licensed under the BSD 3-Clause License; see the included +# LICENSE file, or visit one of the following pages for details: +# - https://github.com/Breakthrough/PySceneDetect/ +# - http://www.bcastell.com/projects/PySceneDetect/ +# +# This software uses Numpy, OpenCV, click, tqdm, simpletable, and pytest. +# See the included LICENSE files or one of the above URLs for more information. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +""" ``scenedetect.detectors.hash_detector`` Module + +This module implements the :py:class:`HashDetector`, which calculates a hash +value for each from of a video using a perceptual hashing algorithm. Then, the +differences in hash value between frames is calculated. If this difference +exceeds a set threshold, a scene cut is triggered. + +This detector is available from the command-line interface by using the +`detect-hash` command. +""" + +# Third-Party Library Imports +import numpy +import cv2 + +# PySceneDetect Library Imports +from scenedetect.scene_detector import SceneDetector + + +class HashDetector(SceneDetector): + """Detects cuts using a perceptual hashing algorithm. For more information + on the perceptual hashing algorithm see references below. + + 1. https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html + 2. https://github.com/JohannesBuchner/imagehash + + Since the difference between frames is used, unlike the ThresholdDetector, + only fast cuts are detected with this method. + """ + + def __init__(self, threshold=100.0, min_scene_len=15, hash_size=16, highfreq_factor=2): + super(HashDetector, self).__init__() + self.threshold = threshold + # Minimum length of any given scene, in frames (int) or FrameTimecode + self.min_scene_len = min_scene_len + # Size of square of low frequency data to include from the discrete cosine transform + self.hash_size = hash_size + # How much high frequency data should be thrown out from the DCT + # A value of 2 means only keep 1/2 of the freq data, a value of 4 means only keep 1/4 + self.highfreq_factor = highfreq_factor + self.last_frame = None + self.last_scene_cut = None + self.last_hash = numpy.array([]) + self._metric_keys = ['hash_dist'] + self.cli_name = 'detect-hash' + + def get_metrics(self): + return self._metric_keys + + def process_frame(self, frame_num, frame_img): + """ Similar to ContentDetector, but using a perceptual hashing algorithm + to calculate a hash for each frame and then calculate a hash difference + frame to frame. + + Arguments: + frame_num (int): Frame number of frame that is being passed. + + frame_img (Optional[int]): Decoded frame image (numpy.ndarray) to perform scene + detection on. Can be None *only* if the self.is_processing_required() method + (inhereted from the base SceneDetector class) returns True. + + Returns: + List[int]: List of frames where scene cuts have been detected. There may be 0 + or more frames in the list, and not necessarily the same as frame_num. + """ + + cut_list = [] + metric_keys = self._metric_keys + _unused = '' + + # Initialize last scene cut point at the beginning of the frames of interest. + if self.last_scene_cut is None: + self.last_scene_cut = frame_num + + # We can only start detecting once we have a frame to compare with. + if self.last_frame is not None: + # We obtain the change in hash value between subsequent frames as + # well as the actual hash value. This is refered to in a statsfile + # as their respective metric keys. + if (self.stats_manager is not None and + self.stats_manager.metrics_exist(frame_num, metric_keys)): + hash_dist = self.stats_manager.get_metrics(frame_num, metric_keys) + else: + # Perceptual hashing algorithm based on phash, updated to use OpenCV instead of PIL + scipy + # https://github.com/JohannesBuchner/imagehash + + # Convert to grayscale + curr_gray = cv2.cvtColor(frame_img, cv2.COLOR_BGR2GRAY) + # Resize image to square to help with DCT + imsize = self.hash_size * self.highfreq_factor + curr_resized = cv2.resize(curr_gray, (imsize, imsize), interpolation=cv2.INTER_AREA) + # Calculate discrete cosine tranformation of the image + curr_resized = numpy.float32(curr_resized) / numpy.max(numpy.max(curr_resized)) + curr_dct = cv2.dct(curr_resized) + # Only keep the low frequency information + curr_dct_low_freq = curr_dct[:self.hash_size, :self.hash_size] + # Calculate the median of the low frequency information + curr_med = numpy.median(curr_dct_low_freq) + # Transform the low frequency information into a binary image based on > or < median + curr_hash = curr_dct_low_freq > curr_med + + last_hash = self.last_hash + + if last_hash.size == 0: + # Calculate hash as above + last_gray = cv2.cvtColor(self.last_frame, cv2.COLOR_BGR2GRAY) + last_resized = cv2.resize(last_gray, (imsize, imsize), interpolation=cv2.INTER_AREA) + last_resized = numpy.float32(last_resized) / numpy.max(numpy.max(last_resized)) + last_dct = cv2.dct(last_resized) + last_dct_low_freq = last_dct[:self.hash_size, :self.hash_size] + last_med = numpy.median(last_dct_low_freq) + last_hash = last_dct_low_freq > last_med + + # Hamming distance is calculated to compare to last frame + hash_dist = numpy.count_nonzero(curr_hash.flatten() != last_hash.flatten()) + + if self.stats_manager is not None: + self.stats_manager.set_metrics(frame_num, { + metric_keys[0]: hash_dist}) + + self.last_hash = curr_hash + + + # We consider any frame over the threshold a new scene, but only if + # the minimum scene length has been reached (otherwise it is ignored). + if hash_dist >= self.threshold and ( + (frame_num - self.last_scene_cut) >= self.min_scene_len): + cut_list.append(frame_num) + self.last_scene_cut = frame_num + + if self.last_frame is not None and self.last_frame is not _unused: + del self.last_frame + + # If we have the next frame computed, don't copy the current frame + # into last_frame since we won't use it on the next call anyways. + if (self.stats_manager is not None and + self.stats_manager.metrics_exist(frame_num+1, metric_keys)): + self.last_frame = _unused + else: + self.last_frame = frame_img.copy() + + return cut_list \ No newline at end of file From f05b68012c84cb9557863d9c08d6b4f661dca7d4 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Sun, 2 Oct 2022 22:36:21 -0400 Subject: [PATCH 02/19] Added cli and init calls. --- scenedetect/__init__.py | 2 +- scenedetect/cli/__init__.py | 41 +++++++++++++++++++++++++++++++ scenedetect/detectors/__init__.py | 1 + 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/scenedetect/__init__.py b/scenedetect/__init__.py index a5e5be68..dada1cb6 100644 --- a/scenedetect/__init__.py +++ b/scenedetect/__init__.py @@ -43,7 +43,7 @@ from scenedetect.video_stream import VideoStream, VideoOpenFailure from scenedetect.backends import AVAILABLE_BACKENDS, VideoStreamCv2, VideoStreamAv from scenedetect.stats_manager import StatsManager, StatsFileCorrupt -from scenedetect.detectors import ContentDetector, AdaptiveDetector, ThresholdDetector +from scenedetect.detectors import ContentDetector, AdaptiveDetector, ThresholdDetector, HashDetector from scenedetect.video_splitter import split_video_ffmpeg, split_video_mkvmerge from scenedetect.platform import init_logger diff --git a/scenedetect/cli/__init__.py b/scenedetect/cli/__init__.py index dfa32a7c..bf006ded 100644 --- a/scenedetect/cli/__init__.py +++ b/scenedetect/cli/__init__.py @@ -613,6 +613,47 @@ def detect_threshold_command( ) +@click.command('detect-hash') +@click.option( + '--threshold', '-t', metavar='VAL', + type=click.FLOAT, default=100.0, show_default=True, help= + 'Threshold value (float) that the hash_dist metric must exceed to trigger' + ' a new scene. Refers to frame metric hash_dist in the stats file.') +@click.option( + '--size', '-s', metavar='VAL', + type=click.IntRange(min=2), default=16, show_default=True, help= + 'Size of the hash used in the perceptual hasing algorithm. Must be an ' + 'integer >=2.') +@click.option( + '--freq_factor', '-f', metavar='VAL', + type=click.IntRange(min=1), default=2, show_default=True, help= + 'Parameter used to specify the amount of high frequency image information ' + 'used for the perceptual hashing algorithm. A high value uses less high ' + 'frequency image information, meaning that the algorithm is less sensitive ' + 'to small changes. A low value causes the algorithm to be more sensitive to' + ' small changes. Must be an integer >0.') +@click.pass_context +def detect_hash_command(ctx, threshold, size, freq_factor): + """ Perform perceptual hashing based scene detection on input video(s). + detect-hash + detect-hash --threshold 27.5 + detect-hash --threshold 100 --size 16 --freq_factor 2 + """ + + min_scene_len = 0 if ctx.obj.drop_short_scenes else ctx.obj.min_scene_len + + logging.debug('Detecting scenes using hash detector. parameters:\n' + ' threshold: %d, min-scene-len: %d, hash-size: %d,' + ' freq-factor: %d', threshold, min_scene_len, size, freq_factor) + + # Initialize the detector and add it to the scene manager + ctx.obj.add_detector(scenedetect.detectors.HashDetector( + threshold=threshold, + min_scene_len=min_scene_len, + hash_size=size, + highfreq_factor=freq_factor)) + + @click.command('export-html') @click.option( '--filename', diff --git a/scenedetect/detectors/__init__.py b/scenedetect/detectors/__init__.py index d274eeec..869a9595 100644 --- a/scenedetect/detectors/__init__.py +++ b/scenedetect/detectors/__init__.py @@ -76,6 +76,7 @@ from scenedetect.detectors.content_detector import ContentDetector from scenedetect.detectors.threshold_detector import ThresholdDetector from scenedetect.detectors.adaptive_detector import AdaptiveDetector +from scenedetect.detectors.hash_detector import HashDetector # Algorithms being ported: #from scenedetect.detectors.motion_detector import MotionDetector From 44003d4ca565cfbd503798386aec9cfb6d758c4c Mon Sep 17 00:00:00 2001 From: wjs018 Date: Sun, 2 Oct 2022 23:21:21 -0400 Subject: [PATCH 03/19] Fixing cli command. --- scenedetect/cli/__init__.py | 44 ++++++++++++++++++++++++++----------- scenedetect/cli/config.py | 6 +++++ scenedetect/cli/context.py | 40 +++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 13 deletions(-) diff --git a/scenedetect/cli/__init__.py b/scenedetect/cli/__init__.py index bf006ded..5fa89b9e 100644 --- a/scenedetect/cli/__init__.py +++ b/scenedetect/cli/__init__.py @@ -22,6 +22,7 @@ """ import logging +from optparse import Option from typing import AnyStr, Optional import click @@ -618,12 +619,14 @@ def detect_threshold_command( '--threshold', '-t', metavar='VAL', type=click.FLOAT, default=100.0, show_default=True, help= 'Threshold value (float) that the hash_dist metric must exceed to trigger' - ' a new scene. Refers to frame metric hash_dist in the stats file.') + ' a new scene. Refers to frame metric hash_dist in the stats file.' +) @click.option( '--size', '-s', metavar='VAL', type=click.IntRange(min=2), default=16, show_default=True, help= 'Size of the hash used in the perceptual hasing algorithm. Must be an ' - 'integer >=2.') + 'integer >=2.' +) @click.option( '--freq_factor', '-f', metavar='VAL', type=click.IntRange(min=1), default=2, show_default=True, help= @@ -631,27 +634,41 @@ def detect_threshold_command( 'used for the perceptual hashing algorithm. A high value uses less high ' 'frequency image information, meaning that the algorithm is less sensitive ' 'to small changes. A low value causes the algorithm to be more sensitive to' - ' small changes. Must be an integer >0.') + ' small changes. Must be an integer >0.' +) +@click.option( + '--min-scene-len', + '-m', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Minimum length of any scene. Overrides global min-scene-len (-m) setting.' + ' TIMECODE can be specified as exact number of frames, a time in seconds followed by s,' + ' or a timecode in the format HH:MM:SS or HH:MM:SS.nnn.%s' % + ('' if USER_CONFIG.is_default('detect-hash', 'min-scene-len') else + USER_CONFIG.get_help_string('detect-hash', 'min-scene-len')) +) @click.pass_context -def detect_hash_command(ctx, threshold, size, freq_factor): +def detect_hash_command( + ctx: click.Context, + threshold: Optional[float], + size: Optional[int], + freq_factor: Optional[int], + min_scene_len: Optional[str] + ): """ Perform perceptual hashing based scene detection on input video(s). detect-hash detect-hash --threshold 27.5 detect-hash --threshold 100 --size 16 --freq_factor 2 """ + assert isinstance(ctx.obj, CliContext) - min_scene_len = 0 if ctx.obj.drop_short_scenes else ctx.obj.min_scene_len - - logging.debug('Detecting scenes using hash detector. parameters:\n' - ' threshold: %d, min-scene-len: %d, hash-size: %d,' - ' freq-factor: %d', threshold, min_scene_len, size, freq_factor) - - # Initialize the detector and add it to the scene manager - ctx.obj.add_detector(scenedetect.detectors.HashDetector( + ctx.obj.handle_detect_hash( threshold=threshold, min_scene_len=min_scene_len, hash_size=size, - highfreq_factor=freq_factor)) + highfreq_factor=freq_factor + ) @click.command('export-html') @@ -1063,3 +1080,4 @@ def save_images_command( _add_cli_command(scenedetect_cli, detect_content_command) _add_cli_command(scenedetect_cli, detect_threshold_command) _add_cli_command(scenedetect_cli, detect_adaptive_command) +_add_cli_command(scenedetect_cli, detect_hash_command) diff --git a/scenedetect/cli/config.py b/scenedetect/cli/config.py index 891a8d80..650f9efa 100644 --- a/scenedetect/cli/config.py +++ b/scenedetect/cli/config.py @@ -96,6 +96,12 @@ def __str__(self) -> str: 'min-scene-len': TimecodeValue(0), 'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0), }, + 'detect-hash': { + 'threshold': RangeValue(100, min_val=0.0, max_val=65536.0), + 'size': RangeValue(16, min_val=2, max_val=65536), + 'freq_factor': RangeValue(2, min_val=1, max_val=65536), + 'min_scene_len': TimecodeValue(0) + }, 'export-html': { 'filename': '$VIDEO_NAME-Scenes.html', 'image-height': 0, diff --git a/scenedetect/cli/context.py b/scenedetect/cli/context.py index cb678819..5783e1e3 100644 --- a/scenedetect/cli/context.py +++ b/scenedetect/cli/context.py @@ -396,6 +396,46 @@ def handle_detect_threshold( )) self.options_processed = options_processed_orig + + def handle_detect_hash( + self, + threshold: Optional[float], + min_scene_len: Optional[str], + hash_size: Optional[int], + highfreq_factor: Optional[int] + ): + """Handle detect-hash command options.""" + self._check_input_open() + options_processed_orig = self.options_processed + self.options_processed = False + + if self.drop_short_scenes: + min_scene_len = 0 + else: + if min_scene_len is None: + if self.config.is_default("detect-hash", "min-scene-len"): + min_scene_len = self.min_scene_len.frame_num + else: + min_scene_len = self.config.get_value("detect-hash", "min-scene-len") + min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num + + threshold = self.config.get_value("detect-hash", "threshold", threshold) + hash_size = self.config.get_value("detect-hash", "size", hash_size) + highfreq_factor = self.config.get_value("detect-hash", "freq_factor", highfreq_factor) + + logger.debug("Adding detector: HashDetector(threshold=%f, min_scene_len=%d," + " hash_size=%d, highfreq_factor=%d)", threshold, min_scene_len, hash_size, highfreq_factor) + + self._add_detector( + scenedetect.detectors.HashDetector( + threshold=threshold, + min_scene_len=min_scene_len, + hash_size=hash_size, + highfreq_factor=highfreq_factor + ) + ) + + self.options_processed = options_processed_orig def handle_export_html( self, From 9ff494c306d28a3083805457a97d08f7fd4d515c Mon Sep 17 00:00:00 2001 From: wjs018 Date: Sun, 2 Oct 2022 23:23:19 -0400 Subject: [PATCH 04/19] Cleaned up extra import. --- scenedetect/cli/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scenedetect/cli/__init__.py b/scenedetect/cli/__init__.py index 5fa89b9e..3f6d6bd1 100644 --- a/scenedetect/cli/__init__.py +++ b/scenedetect/cli/__init__.py @@ -22,7 +22,6 @@ """ import logging -from optparse import Option from typing import AnyStr, Optional import click From cb45225adbe9ff411276f91ae10f9e1da8004b50 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Sat, 8 Oct 2022 00:22:10 -0400 Subject: [PATCH 05/19] Formatting updates. --- scenedetect/__init__.py | 4 +- scenedetect/cli/__init__.py | 56 +++++++++++++------------- scenedetect/cli/context.py | 25 +++++------- scenedetect/detectors/hash_detector.py | 40 +++++++++--------- 4 files changed, 60 insertions(+), 65 deletions(-) diff --git a/scenedetect/__init__.py b/scenedetect/__init__.py index 47784cda..975cba10 100644 --- a/scenedetect/__init__.py +++ b/scenedetect/__init__.py @@ -39,8 +39,8 @@ from scenedetect.video_splitter import split_video_ffmpeg, split_video_mkvmerge from scenedetect.scene_detector import SceneDetector from scenedetect.detectors import ContentDetector, AdaptiveDetector, ThresholdDetector, HashDetector -from scenedetect.backends import (AVAILABLE_BACKENDS, VideoStreamCv2, VideoStreamAv, VideoStreamMoviePy, - VideoCaptureAdapter) +from scenedetect.backends import (AVAILABLE_BACKENDS, VideoStreamCv2, VideoStreamAv, + VideoStreamMoviePy, VideoCaptureAdapter) from scenedetect.stats_manager import StatsManager, StatsFileCorrupt from scenedetect.scene_manager import SceneManager, save_images diff --git a/scenedetect/cli/__init__.py b/scenedetect/cli/__init__.py index 3fb82285..1a632d3d 100644 --- a/scenedetect/cli/__init__.py +++ b/scenedetect/cli/__init__.py @@ -712,26 +712,35 @@ def detect_threshold_command( @click.command('detect-hash') @click.option( - '--threshold', '-t', metavar='VAL', - type=click.FLOAT, default=100.0, show_default=True, help= - 'Threshold value (float) that the hash_dist metric must exceed to trigger' - ' a new scene. Refers to frame metric hash_dist in the stats file.' -) + '--threshold', + '-t', + metavar='VAL', + type=click.FLOAT, + default=100.0, + show_default=True, + help='Threshold value (float) that the hash_dist metric must exceed to trigger' + ' a new scene. Refers to frame metric hash_dist in the stats file.') @click.option( - '--size', '-s', metavar='VAL', - type=click.IntRange(min=2), default=16, show_default=True, help= - 'Size of the hash used in the perceptual hasing algorithm. Must be an ' - 'integer >=2.' -) + '--size', + '-s', + metavar='VAL', + type=click.IntRange(min=2), + default=16, + show_default=True, + help='Size of the hash used in the perceptual hasing algorithm. Must be an ' + 'integer >=2.') @click.option( - '--freq_factor', '-f', metavar='VAL', - type=click.IntRange(min=1), default=2, show_default=True, help= - 'Parameter used to specify the amount of high frequency image information ' + '--freq_factor', + '-f', + metavar='VAL', + type=click.IntRange(min=1), + default=2, + show_default=True, + help='Parameter used to specify the amount of high frequency image information ' 'used for the perceptual hashing algorithm. A high value uses less high ' 'frequency image information, meaning that the algorithm is less sensitive ' 'to small changes. A low value causes the algorithm to be more sensitive to' - ' small changes. Must be an integer >0.' -) + ' small changes. Must be an integer >0.') @click.option( '--min-scene-len', '-m', @@ -741,17 +750,11 @@ def detect_threshold_command( help='Minimum length of any scene. Overrides global min-scene-len (-m) setting.' ' TIMECODE can be specified as exact number of frames, a time in seconds followed by s,' ' or a timecode in the format HH:MM:SS or HH:MM:SS.nnn.%s' % - ('' if USER_CONFIG.is_default('detect-hash', 'min-scene-len') else - USER_CONFIG.get_help_string('detect-hash', 'min-scene-len')) -) + ('' if USER_CONFIG.is_default('detect-hash', 'min-scene-len') else USER_CONFIG.get_help_string( + 'detect-hash', 'min-scene-len'))) @click.pass_context -def detect_hash_command( - ctx: click.Context, - threshold: Optional[float], - size: Optional[int], - freq_factor: Optional[int], - min_scene_len: Optional[str] - ): +def detect_hash_command(ctx: click.Context, threshold: Optional[float], size: Optional[int], + freq_factor: Optional[int], min_scene_len: Optional[str]): """ Perform perceptual hashing based scene detection on input video(s). detect-hash detect-hash --threshold 27.5 @@ -763,8 +766,7 @@ def detect_hash_command( threshold=threshold, min_scene_len=min_scene_len, hash_size=size, - highfreq_factor=freq_factor - ) + highfreq_factor=freq_factor) @click.command('export-html') diff --git a/scenedetect/cli/context.py b/scenedetect/cli/context.py index 3a4937e9..08c02372 100644 --- a/scenedetect/cli/context.py +++ b/scenedetect/cli/context.py @@ -442,14 +442,9 @@ def handle_detect_threshold( )) self.options_processed = options_processed_orig - - def handle_detect_hash( - self, - threshold: Optional[float], - min_scene_len: Optional[str], - hash_size: Optional[int], - highfreq_factor: Optional[int] - ): + + def handle_detect_hash(self, threshold: Optional[float], min_scene_len: Optional[str], + hash_size: Optional[int], highfreq_factor: Optional[int]): """Handle detect-hash command options.""" self._check_input_open() options_processed_orig = self.options_processed @@ -464,22 +459,22 @@ def handle_detect_hash( else: min_scene_len = self.config.get_value("detect-hash", "min-scene-len") min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num - + threshold = self.config.get_value("detect-hash", "threshold", threshold) hash_size = self.config.get_value("detect-hash", "size", hash_size) highfreq_factor = self.config.get_value("detect-hash", "freq_factor", highfreq_factor) - logger.debug("Adding detector: HashDetector(threshold=%f, min_scene_len=%d," - " hash_size=%d, highfreq_factor=%d)", threshold, min_scene_len, hash_size, highfreq_factor) - + logger.debug( + "Adding detector: HashDetector(threshold=%f, min_scene_len=%d," + " hash_size=%d, highfreq_factor=%d)", threshold, min_scene_len, hash_size, + highfreq_factor) + self._add_detector( scenedetect.detectors.HashDetector( threshold=threshold, min_scene_len=min_scene_len, hash_size=hash_size, - highfreq_factor=highfreq_factor - ) - ) + highfreq_factor=highfreq_factor)) self.options_processed = options_processed_orig diff --git a/scenedetect/detectors/hash_detector.py b/scenedetect/detectors/hash_detector.py index bf072115..a44d8fad 100644 --- a/scenedetect/detectors/hash_detector.py +++ b/scenedetect/detectors/hash_detector.py @@ -23,7 +23,6 @@ # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # - """ ``scenedetect.detectors.hash_detector`` Module This module implements the :py:class:`HashDetector`, which calculates a hash @@ -69,10 +68,10 @@ def __init__(self, threshold=100.0, min_scene_len=15, hash_size=16, highfreq_fac self.last_hash = numpy.array([]) self._metric_keys = ['hash_dist'] self.cli_name = 'detect-hash' - + def get_metrics(self): return self._metric_keys - + def process_frame(self, frame_num, frame_img): """ Similar to ContentDetector, but using a perceptual hashing algorithm to calculate a hash for each frame and then calculate a hash difference @@ -97,19 +96,19 @@ def process_frame(self, frame_num, frame_img): # Initialize last scene cut point at the beginning of the frames of interest. if self.last_scene_cut is None: self.last_scene_cut = frame_num - + # We can only start detecting once we have a frame to compare with. if self.last_frame is not None: - # We obtain the change in hash value between subsequent frames as + # We obtain the change in hash value between subsequent frames as # well as the actual hash value. This is refered to in a statsfile # as their respective metric keys. - if (self.stats_manager is not None and - self.stats_manager.metrics_exist(frame_num, metric_keys)): + if (self.stats_manager is not None + and self.stats_manager.metrics_exist(frame_num, metric_keys)): hash_dist = self.stats_manager.get_metrics(frame_num, metric_keys) else: # Perceptual hashing algorithm based on phash, updated to use OpenCV instead of PIL + scipy # https://github.com/JohannesBuchner/imagehash - + # Convert to grayscale curr_gray = cv2.cvtColor(frame_img, cv2.COLOR_BGR2GRAY) # Resize image to square to help with DCT @@ -124,45 +123,44 @@ def process_frame(self, frame_num, frame_img): curr_med = numpy.median(curr_dct_low_freq) # Transform the low frequency information into a binary image based on > or < median curr_hash = curr_dct_low_freq > curr_med - + last_hash = self.last_hash - + if last_hash.size == 0: # Calculate hash as above last_gray = cv2.cvtColor(self.last_frame, cv2.COLOR_BGR2GRAY) - last_resized = cv2.resize(last_gray, (imsize, imsize), interpolation=cv2.INTER_AREA) + last_resized = cv2.resize( + last_gray, (imsize, imsize), interpolation=cv2.INTER_AREA) last_resized = numpy.float32(last_resized) / numpy.max(numpy.max(last_resized)) last_dct = cv2.dct(last_resized) last_dct_low_freq = last_dct[:self.hash_size, :self.hash_size] last_med = numpy.median(last_dct_low_freq) last_hash = last_dct_low_freq > last_med - + # Hamming distance is calculated to compare to last frame hash_dist = numpy.count_nonzero(curr_hash.flatten() != last_hash.flatten()) if self.stats_manager is not None: - self.stats_manager.set_metrics(frame_num, { - metric_keys[0]: hash_dist}) - + self.stats_manager.set_metrics(frame_num, {metric_keys[0]: hash_dist}) + self.last_hash = curr_hash - # We consider any frame over the threshold a new scene, but only if # the minimum scene length has been reached (otherwise it is ignored). if hash_dist >= self.threshold and ( - (frame_num - self.last_scene_cut) >= self.min_scene_len): + (frame_num - self.last_scene_cut) >= self.min_scene_len): cut_list.append(frame_num) self.last_scene_cut = frame_num if self.last_frame is not None and self.last_frame is not _unused: del self.last_frame - + # If we have the next frame computed, don't copy the current frame # into last_frame since we won't use it on the next call anyways. - if (self.stats_manager is not None and - self.stats_manager.metrics_exist(frame_num+1, metric_keys)): + if (self.stats_manager is not None + and self.stats_manager.metrics_exist(frame_num + 1, metric_keys)): self.last_frame = _unused else: self.last_frame = frame_img.copy() - return cut_list \ No newline at end of file + return cut_list From 0f54efc9deab6af944cb33196e98adcad512576f Mon Sep 17 00:00:00 2001 From: wjs018 Date: Sat, 8 Oct 2022 01:05:53 -0400 Subject: [PATCH 06/19] Added a hash calculation helper function and removed deprecated StatsManager usage. --- scenedetect/detectors/hash_detector.py | 108 +++++++++++++------------ 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/scenedetect/detectors/hash_detector.py b/scenedetect/detectors/hash_detector.py index a44d8fad..081a2620 100644 --- a/scenedetect/detectors/hash_detector.py +++ b/scenedetect/detectors/hash_detector.py @@ -42,6 +42,36 @@ from scenedetect.scene_detector import SceneDetector +def calculate_frame_hash(frame_img, hash_size, highfreq_factor): + """Helper function that calculates the hash of a frame and returns it. + + Perceptual hashing algorithm based on phash, updated to use OpenCV instead of PIL + scipy + https://github.com/JohannesBuchner/imagehash + """ + + # Transform to grayscale + gray_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2GRAY) + + # Resize image to square to help with DCT + imsize = hash_size * highfreq_factor + resized_img = cv2.resize(gray_img, (imsize, imsize), interpolation=cv2.INTER_AREA) + + # Calculate discrete cosine tranformation of the image + resized_img = numpy.float32(resized_img) / numpy.max(numpy.max(resized_img)) + dct_complete = cv2.dct(resized_img) + + # Only keep the low frequency information + dct_low_freq = dct_complete[:hash_size, :hash_size] + + # Calculate the median of the low frequency informations + med = numpy.median(dct_low_freq) + + # Transform the low frequency information into a binary image based on > or < median + hash_img = dct_low_freq > med + + return hash_img + + class HashDetector(SceneDetector): """Detects cuts using a perceptual hashing algorithm. For more information on the perceptual hashing algorithm see references below. @@ -55,14 +85,19 @@ class HashDetector(SceneDetector): def __init__(self, threshold=100.0, min_scene_len=15, hash_size=16, highfreq_factor=2): super(HashDetector, self).__init__() + # How much of a difference between subsequent hash values should trigger a cut self.threshold = threshold + # Minimum length of any given scene, in frames (int) or FrameTimecode self.min_scene_len = min_scene_len + # Size of square of low frequency data to include from the discrete cosine transform self.hash_size = hash_size + # How much high frequency data should be thrown out from the DCT # A value of 2 means only keep 1/2 of the freq data, a value of 4 means only keep 1/4 self.highfreq_factor = highfreq_factor + self.last_frame = None self.last_scene_cut = None self.last_hash = numpy.array([]) @@ -99,51 +134,26 @@ def process_frame(self, frame_num, frame_img): # We can only start detecting once we have a frame to compare with. if self.last_frame is not None: - # We obtain the change in hash value between subsequent frames as - # well as the actual hash value. This is refered to in a statsfile - # as their respective metric keys. - if (self.stats_manager is not None - and self.stats_manager.metrics_exist(frame_num, metric_keys)): - hash_dist = self.stats_manager.get_metrics(frame_num, metric_keys) - else: - # Perceptual hashing algorithm based on phash, updated to use OpenCV instead of PIL + scipy - # https://github.com/JohannesBuchner/imagehash - - # Convert to grayscale - curr_gray = cv2.cvtColor(frame_img, cv2.COLOR_BGR2GRAY) - # Resize image to square to help with DCT - imsize = self.hash_size * self.highfreq_factor - curr_resized = cv2.resize(curr_gray, (imsize, imsize), interpolation=cv2.INTER_AREA) - # Calculate discrete cosine tranformation of the image - curr_resized = numpy.float32(curr_resized) / numpy.max(numpy.max(curr_resized)) - curr_dct = cv2.dct(curr_resized) - # Only keep the low frequency information - curr_dct_low_freq = curr_dct[:self.hash_size, :self.hash_size] - # Calculate the median of the low frequency information - curr_med = numpy.median(curr_dct_low_freq) - # Transform the low frequency information into a binary image based on > or < median - curr_hash = curr_dct_low_freq > curr_med - - last_hash = self.last_hash - - if last_hash.size == 0: - # Calculate hash as above - last_gray = cv2.cvtColor(self.last_frame, cv2.COLOR_BGR2GRAY) - last_resized = cv2.resize( - last_gray, (imsize, imsize), interpolation=cv2.INTER_AREA) - last_resized = numpy.float32(last_resized) / numpy.max(numpy.max(last_resized)) - last_dct = cv2.dct(last_resized) - last_dct_low_freq = last_dct[:self.hash_size, :self.hash_size] - last_med = numpy.median(last_dct_low_freq) - last_hash = last_dct_low_freq > last_med - - # Hamming distance is calculated to compare to last frame - hash_dist = numpy.count_nonzero(curr_hash.flatten() != last_hash.flatten()) - - if self.stats_manager is not None: - self.stats_manager.set_metrics(frame_num, {metric_keys[0]: hash_dist}) - - self.last_hash = curr_hash + # We obtain the change in hash value between subsequent frames. + curr_hash = calculate_frame_hash( + frame_img=frame_img, hash_size=self.hash_size, highfreq_factor=self.highfreq_factor) + + last_hash = self.last_hash + + if last_hash.size == 0: + # Calculate hash of last frame + last_hash = calculate_frame_hash( + frame_img=self.last_frame, + hash_size=self.hash_size, + highfreq_factor=self.highfreq_factor) + + # Hamming distance is calculated to compare to last frame + hash_dist = numpy.count_nonzero(curr_hash.flatten() != last_hash.flatten()) + + if self.stats_manager is not None: + self.stats_manager.set_metrics(frame_num, {metric_keys[0]: hash_dist}) + + self.last_hash = curr_hash # We consider any frame over the threshold a new scene, but only if # the minimum scene length has been reached (otherwise it is ignored). @@ -155,12 +165,6 @@ def process_frame(self, frame_num, frame_img): if self.last_frame is not None and self.last_frame is not _unused: del self.last_frame - # If we have the next frame computed, don't copy the current frame - # into last_frame since we won't use it on the next call anyways. - if (self.stats_manager is not None - and self.stats_manager.metrics_exist(frame_num + 1, metric_keys)): - self.last_frame = _unused - else: - self.last_frame = frame_img.copy() + self.last_frame = frame_img.copy() return cut_list From efa4023e22f60397d94217012b97c32b42b9f5e5 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Sun, 16 Oct 2022 00:51:28 -0400 Subject: [PATCH 07/19] Redefined default values for compatibility with config files. --- scenedetect/cli/__init__.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/scenedetect/cli/__init__.py b/scenedetect/cli/__init__.py index 1a632d3d..1779bd83 100644 --- a/scenedetect/cli/__init__.py +++ b/scenedetect/cli/__init__.py @@ -715,32 +715,34 @@ def detect_threshold_command( '--threshold', '-t', metavar='VAL', - type=click.FLOAT, - default=100.0, - show_default=True, + type=click.FloatRange(CONFIG_MAP['detect-hash']['threshold'].min_val, + CONFIG_MAP['detect-hash']['threshold'].max_val), + default=None, help='Threshold value (float) that the hash_dist metric must exceed to trigger' - ' a new scene. Refers to frame metric hash_dist in the stats file.') + ' a new scene. Refers to frame metric hash_dist in the stats file.%s' % + (USER_CONFIG.get_help_string('detect-hash', 'threshold'))) @click.option( '--size', '-s', metavar='VAL', - type=click.IntRange(min=2), - default=16, - show_default=True, + type=click.IntRange(CONFIG_MAP['detect-hash']['size'].min_val, + CONFIG_MAP['detect-hash']['size'].max_val), + default=None, help='Size of the hash used in the perceptual hasing algorithm. Must be an ' - 'integer >=2.') + 'integer >=2.%s' % (USER_CONFIG.get_help_string('detect-hash', 'size'))) @click.option( '--freq_factor', '-f', metavar='VAL', - type=click.IntRange(min=1), - default=2, - show_default=True, + type=click.IntRange(CONFIG_MAP['detect-hash']['freq_factor'].min_val, + CONFIG_MAP['detect-hash']['freq_factor'].max_val), + default=None, help='Parameter used to specify the amount of high frequency image information ' 'used for the perceptual hashing algorithm. A high value uses less high ' 'frequency image information, meaning that the algorithm is less sensitive ' 'to small changes. A low value causes the algorithm to be more sensitive to' - ' small changes. Must be an integer >0.') + ' small changes. Must be an integer >0.%s' % + (USER_CONFIG.get_help_string('detect-hash', 'freq_factor'))) @click.option( '--min-scene-len', '-m', From 688d67e894100f1f965b57ddb2bcebbf28e737b8 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Mon, 17 Oct 2022 00:31:14 -0400 Subject: [PATCH 08/19] Added private function to return min_scene_len for use by detectors. --- scenedetect/cli/config.py | 2 +- scenedetect/cli/context.py | 70 +++++++++++++++++--------------------- 2 files changed, 32 insertions(+), 40 deletions(-) diff --git a/scenedetect/cli/config.py b/scenedetect/cli/config.py index 39ca4fc5..8c5cacf7 100644 --- a/scenedetect/cli/config.py +++ b/scenedetect/cli/config.py @@ -259,7 +259,7 @@ def from_config(config_value: str, default: 'KernelSizeValue') -> 'KernelSizeVal 'threshold': RangeValue(100, min_val=0.0, max_val=65536.0), 'size': RangeValue(16, min_val=2, max_val=65536), 'freq_factor': RangeValue(2, min_val=1, max_val=65536), - 'min_scene_len': TimecodeValue(0) + 'min-scene-len': TimecodeValue(0) }, 'export-html': { 'filename': '$VIDEO_NAME-Scenes.html', diff --git a/scenedetect/cli/context.py b/scenedetect/cli/context.py index 08c02372..04759a41 100644 --- a/scenedetect/cli/context.py +++ b/scenedetect/cli/context.py @@ -30,7 +30,7 @@ from scenedetect.stats_manager import StatsManager from scenedetect.scene_manager import SceneManager, Interpolation -from scenedetect.cli.config import ConfigRegistry, ConfigLoadFailure, CHOICE_MAP +from scenedetect.cli.config import CONFIG_MAP, ConfigRegistry, ConfigLoadFailure, CHOICE_MAP logger = logging.getLogger('pyscenedetect') @@ -305,15 +305,7 @@ def handle_detect_content( options_processed_orig = self.options_processed self.options_processed = False - if self.drop_short_scenes: - min_scene_len = 0 - else: - if min_scene_len is None: - if self.config.is_default('detect-content', 'min-scene-len'): - min_scene_len = self.min_scene_len.frame_num - else: - min_scene_len = self.config.get_value('detect-content', 'min-scene-len') - min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num + min_scene_len = self._get_min_scene_len("detect-content") if weights is not None: try: @@ -363,15 +355,7 @@ def handle_detect_adaptive( self.config.config_dict["detect-adaptive"]["min-content-val"] = ( self.config.config_dict["detect-adaptive"]["min-deleta-hsv"]) - if self.drop_short_scenes: - min_scene_len = 0 - else: - if min_scene_len is None: - if self.config.is_default("detect-adaptive", "min-scene-len"): - min_scene_len = self.min_scene_len.frame_num - else: - min_scene_len = self.config.get_value("detect-adaptive", "min-scene-len") - min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num + min_scene_len = self._get_min_scene_len("detect-adaptive") if weights is not None: try: @@ -412,16 +396,7 @@ def handle_detect_threshold( options_processed_orig = self.options_processed self.options_processed = False - if self.drop_short_scenes: - min_scene_len = 0 - else: - if min_scene_len is None: - if self.config.is_default("detect-threshold", "min-scene-len"): - min_scene_len = self.min_scene_len.frame_num - else: - min_scene_len = self.config.get_value("detect-threshold", "min-scene-len") - min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num - + min_scene_len = self._get_min_scene_len("detect-threshold") threshold = self.config.get_value("detect-threshold", "threshold", threshold) fade_bias = self.config.get_value("detect-threshold", "fade-bias", fade_bias) # TODO(v1.0): This cannot be disabled right now. @@ -450,16 +425,7 @@ def handle_detect_hash(self, threshold: Optional[float], min_scene_len: Optional options_processed_orig = self.options_processed self.options_processed = False - if self.drop_short_scenes: - min_scene_len = 0 - else: - if min_scene_len is None: - if self.config.is_default("detect-hash", "min-scene-len"): - min_scene_len = self.min_scene_len.frame_num - else: - min_scene_len = self.config.get_value("detect-hash", "min-scene-len") - min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num - + min_scene_len = self._get_min_scene_len("detect-hash") threshold = self.config.get_value("detect-hash", "threshold", threshold) hash_size = self.config.get_value("detect-hash", "size", hash_size) highfreq_factor = self.config.get_value("detect-hash", "freq_factor", highfreq_factor) @@ -888,3 +854,29 @@ def _on_duplicate_command(self, command: str) -> None: raise click.BadParameter( '\n Command %s may only be specified once.' % command, param_hint='%s command' % command) + + def _get_min_scene_len(self, command=None): + """Called when a detector needs to get the min_scene_len before initialization. + + Arguments: + command: string of the detector command e.g. 'detect-adaptive' + + Returns: + min_scene_len + """ + # Raise an error if this function is called without a valid command + assert command in CONFIG_MAP and "min-scene-len" in CONFIG_MAP[command] + + min_scene_len = None + + if self.drop_short_scenes: + min_scene_len = 0 + else: + if min_scene_len is None: + if self.config.is_default(command, "min-scene-len"): + min_scene_len = self.min_scene_len.frame_num + else: + min_scene_len = self.config.get_value(command, "min-scene-len") + min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num + + return min_scene_len From a5841e4b07f16552228d2eb3cf48ceef37877eef Mon Sep 17 00:00:00 2001 From: wjs018 Date: Tue, 18 Oct 2022 02:12:21 -0400 Subject: [PATCH 09/19] Added API reference for detect-hash. --- manual/cli/detectors.rst | 94 ++++++++++++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 13 deletions(-) diff --git a/manual/cli/detectors.rst b/manual/cli/detectors.rst index 6df39415..867a7623 100644 --- a/manual/cli/detectors.rst +++ b/manual/cli/detectors.rst @@ -5,12 +5,13 @@ Detectors *********************************************************************** -There are currently two implemented scene detection algorithms, threshold -based detection (``detect-threshold``), and content-aware detection -(``detect-content``). Each detector can be selected by adding the -respective `detect-` command, and any relevant options, after setting -the main ``scenedetect`` command global options. In general, commands -should follow the form: +There are currently four implemented scene detection algorithms, threshold +based detection (``detect-threshold``), content-aware detection +(``detect-content``), adaptive content-aware detection (``detect-adaptive``), +and perceptual hashing based detection (``detect-hash``). Each detector can be +selected by adding the respective `detect-` command, and any relevant options, +after setting the main ``scenedetect`` command global options. In general, +commands should follow the form: ``scenedetect [global options] [detector] [commands]`` @@ -138,13 +139,6 @@ Detector Options seconds followed by s, or a timecode in the format HH:MM:SS or HH:MM:SS.nnn. -Usage Examples ------------------------------------------------------------------------ - - ``detect-threshold`` - - ``detect-threshold --threshold 15`` - ======================================================================= ``detect-adaptive`` @@ -190,3 +184,77 @@ Detector Options specified as exact number of frames, a time in seconds followed by s, or a timecode in the format HH:MM:SS or HH:MM:SS.nnn. + + +======================================================================= +``detect-hash`` +======================================================================= + +Perform detection using a perceptual hashing algorithm on input video. + +When processing each frame, the frame is converted into a hash and this is +compared to the previously analyzed frame. If the difference between these two +hashes exceeds the value set for `-t`/`--threshold`, then a scene change is +triggered. + +This detector is only available when using the OpenCV backend. + +The hashing algorithm used is based on the implementation of `phash `_. +The basic steps of the hashing algorithm are detailed below: + +1. The image is first converted to grayscale (meaning this detector is not +sensitive to color transitions). +2. The resulting grayscale image is then scaled down in size to a square image +with the length of each side equal to `-s`/`--size` \* `-f`/`--freq_factor`. +3. The discrete cosine transform (DCT) of the resized image is calculated. +4. Only the low frequency information from the DCT is retained. This is +accomplished by discarding all but the upper left values of the resulting DCT +matrix. The size of the resulting submatrix is set as a square with the length +of each side determined by `-s`/`--size`. +5. The median of the retained DCT information is determined. +6. The hash is calculated by converting the retained DCT matrix into a binary +array by comparing each element to the median. The resulting binary values are +True if the value is greater than the median and False if it is less than or +equal to the median. + +The metric used for scene detection is the difference between the hashes of +subsequent frames. This difference is calculated using the Hamming distance +between two hashes. This is defined as the number of elements that differ +between two hashes. This metric is recorded in the statsfile as `hash_dist` if +a statsfile is specified. + +Examples: + + ``detect-hash`` + + ``detect-hash --threshold 80`` + +Detector Options +----------------------------------------------------------------------- + + -t, --threshold VAL Threshold value (float) that the calculated + frame score must exceed to trigger a new scene + (see frame metric hash_dist in stats file). + [default: 100.0] + + -s, --size VAL Hash size (int) that is used for the detector. + Larger values can help increase sensitivity to + small changes, but can increase computation + time. [default: 16] + + -f, --freq_factor VAL Frequency factor (int) used to determing how + much high frequency data is discarded in the + hashing algorithm. For example a value of 4 + corresponds to keeping only 1/4 of the + frequency information of the image (a value of + 2 would be 1/2 of the frequency information, + etc.). Smaller values make the detector more + sensitive to smaller sized features in the + frame, but can increase computation time. + [default: 2] + + -m, --min-scene-len TIMECODE Minimum length of any scene. Overrides global + min-scene-len (-m) setting. TIMECODE can be + specified as exact number of frames, a time in + seconds followed by s, or a timecode in the + format HH:MM:SS or HH:MM:SS.nnn. From 6a877db2d255f20f1bdfa5510510400890970187 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Tue, 18 Oct 2022 02:37:56 -0400 Subject: [PATCH 10/19] Updated docs with detect-hash info. --- docs/reference/command-line.md | 2 +- docs/reference/detection-methods.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/reference/command-line.md b/docs/reference/command-line.md index 80d82a6e..685e7cf9 100644 --- a/docs/reference/command-line.md +++ b/docs/reference/command-line.md @@ -13,6 +13,6 @@ The `scenedetect` command reference is available as part of [the PySceneDetect M - Exporting scene list as HTML (`export-html`) - [Detector Reference](http://scenedetect.com/projects/Manual/en/latest/cli/detectors.html): - - Detectors, e.g. `detect-content`, `detect-threshold`, `detect-adaptive` + - Detectors, e.g. `detect-content`, `detect-threshold`, `detect-adaptive`, `detect-hash` You can also run `scenedetect help all` locally for the full `scenedetect command reference. diff --git a/docs/reference/detection-methods.md b/docs/reference/detection-methods.md index 95ae2777..2db5eefa 100644 --- a/docs/reference/detection-methods.md +++ b/docs/reference/detection-methods.md @@ -21,6 +21,10 @@ The adaptive content detector (`detect-adaptive`) compares the difference in con The threshold-based scene detector (`detect-threshold`) is how most traditional scene detection methods work (e.g. the `ffmpeg blackframe` filter), by comparing the intensity/brightness of the current frame with a set threshold, and triggering a scene cut/break when this value crosses the threshold. In PySceneDetect, this value is computed by averaging the R, G, and B values for every pixel in the frame, yielding a single floating point number representing the average pixel value (from 0.0 to 255.0). +## Perceptual Hash Detector + +The perceptual hash detector (`detect-hash`) calculates a hash for a frame and compares that hash to the previous frame's hash. If the hashes differ by more than the defined threshold, then a scene change is recorded. The hashing algorithm used for this detector is an implementation of `phash` from the [imagehash](https://github.com/JohannesBuchner/imagehash) library. In practice, this detector works similarly to `detect-content` in that it picks up large differences between adjacent frames. One important note is that the hashing algorithm converts the frames to grayscale, so this detector is insensitive to changes in colors if the brightness remains constant. In general, this algorithm is very computationally efficient compared to `detect-content` or `detect-adaptive`, especially if downscaling is not used. See [here](https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html) for an overview of how a perceptual hashing algorithm can be used for detecting similarity (or otherwise) of images and a visual depiction of the algorithm. + # Creating New Detection Algorithms All scene detection algorithms must inherit from [the base `SceneDetector` class](https://scenedetect.com/projects/Manual/en/latest/api/scene_detector.html). Note that the current SceneDetector API is under development and expected to change somewhat before v1.0 is released, so make sure to pin your `scenedetect` dependency to the correct API version (e.g. `scenedetect < 0.6`, `scenedetect < 0.7`, etc...). From ea66b3711c7a5c7114a70a0cd6d871cbb53d314b Mon Sep 17 00:00:00 2001 From: wjs018 Date: Thu, 20 Oct 2022 20:27:25 -0400 Subject: [PATCH 11/19] Minor docs formatting update. --- manual/cli/detectors.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/manual/cli/detectors.rst b/manual/cli/detectors.rst index 867a7623..0cd5093c 100644 --- a/manual/cli/detectors.rst +++ b/manual/cli/detectors.rst @@ -199,8 +199,9 @@ triggered. This detector is only available when using the OpenCV backend. -The hashing algorithm used is based on the implementation of `phash `_. -The basic steps of the hashing algorithm are detailed below: +The hashing algorithm used is based on the implementation of +`phash `_. The basic steps of the +hashing algorithm are detailed below: 1. The image is first converted to grayscale (meaning this detector is not sensitive to color transitions). From 14f2c8cce6935a981a34b0c9f18438df858a99cb Mon Sep 17 00:00:00 2001 From: wjs018 Date: Thu, 20 Oct 2022 22:58:48 -0400 Subject: [PATCH 12/19] Updated default threshold. --- scenedetect/cli/config.py | 2 +- scenedetect/detectors/hash_detector.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scenedetect/cli/config.py b/scenedetect/cli/config.py index 8c5cacf7..bc49f4bf 100644 --- a/scenedetect/cli/config.py +++ b/scenedetect/cli/config.py @@ -256,7 +256,7 @@ def from_config(config_value: str, default: 'KernelSizeValue') -> 'KernelSizeVal 'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0), }, 'detect-hash': { - 'threshold': RangeValue(100, min_val=0.0, max_val=65536.0), + 'threshold': RangeValue(101, min_val=0.0, max_val=65536.0), 'size': RangeValue(16, min_val=2, max_val=65536), 'freq_factor': RangeValue(2, min_val=1, max_val=65536), 'min-scene-len': TimecodeValue(0) diff --git a/scenedetect/detectors/hash_detector.py b/scenedetect/detectors/hash_detector.py index 081a2620..f5370096 100644 --- a/scenedetect/detectors/hash_detector.py +++ b/scenedetect/detectors/hash_detector.py @@ -83,7 +83,7 @@ class HashDetector(SceneDetector): only fast cuts are detected with this method. """ - def __init__(self, threshold=100.0, min_scene_len=15, hash_size=16, highfreq_factor=2): + def __init__(self, threshold=101.0, min_scene_len=15, hash_size=16, highfreq_factor=2): super(HashDetector, self).__init__() # How much of a difference between subsequent hash values should trigger a cut self.threshold = threshold From 88e3ad0b0ca6e65e2b36d810868236cf3c952143 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Thu, 20 Oct 2022 22:59:19 -0400 Subject: [PATCH 13/19] Added cli tests. --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 89bd6d54..4e8bb45a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -43,7 +43,7 @@ DEFAULT_TIME = '-s 2s -d 4s' # Seek forward a bit but limit the amount we process. DEFAULT_DETECTOR = 'detect-content' DEFAULT_CONFIG_FILE = 'scenedetect.cfg' # Ensure we default to a "blank" config file. -ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive'] +ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive', 'detect-hash'] ALL_BACKENDS = ['opencv', 'pyav', 'moviepy'] From 604d756930db116c9699ff951a347f80dcb3d5d2 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Thu, 20 Oct 2022 23:04:28 -0400 Subject: [PATCH 14/19] Updated default value in docs. --- manual/cli/detectors.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manual/cli/detectors.rst b/manual/cli/detectors.rst index 0cd5093c..c48f5869 100644 --- a/manual/cli/detectors.rst +++ b/manual/cli/detectors.rst @@ -236,7 +236,7 @@ Detector Options -t, --threshold VAL Threshold value (float) that the calculated frame score must exceed to trigger a new scene (see frame metric hash_dist in stats file). - [default: 100.0] + [default: 101.0] -s, --size VAL Hash size (int) that is used for the detector. Larger values can help increase sensitivity to From fda2fae92056ad92cc868d47965ba03f13a5ab10 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Fri, 21 Oct 2022 00:15:19 -0400 Subject: [PATCH 15/19] Updated tests to include detect-hash. --- scenedetect/detectors/hash_detector.py | 11 ++++++++++- tests/test_detectors.py | 26 ++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/scenedetect/detectors/hash_detector.py b/scenedetect/detectors/hash_detector.py index f5370096..f6f2a453 100644 --- a/scenedetect/detectors/hash_detector.py +++ b/scenedetect/detectors/hash_detector.py @@ -56,8 +56,14 @@ def calculate_frame_hash(frame_img, hash_size, highfreq_factor): imsize = hash_size * highfreq_factor resized_img = cv2.resize(gray_img, (imsize, imsize), interpolation=cv2.INTER_AREA) + # Check to avoid dividing by zero + max_value = numpy.max(numpy.max(resized_img)) + if max_value == 0: + # Just set the max to 1 to not change the values + max_value = 1 + # Calculate discrete cosine tranformation of the image - resized_img = numpy.float32(resized_img) / numpy.max(numpy.max(resized_img)) + resized_img = numpy.float32(resized_img) / max_value dct_complete = cv2.dct(resized_img) # Only keep the low frequency information @@ -107,6 +113,9 @@ def __init__(self, threshold=101.0, min_scene_len=15, hash_size=16, highfreq_fac def get_metrics(self): return self._metric_keys + def is_processing_required(self, frame_num): + return True + def process_frame(self, frame_num, frame_img): """ Similar to ContentDetector, but using a perceptual hashing algorithm to calculate a hash for each frame and then calculate a hash difference diff --git a/tests/test_detectors.py b/tests/test_detectors.py index 2a41d2b7..f69290ab 100644 --- a/tests/test_detectors.py +++ b/tests/test_detectors.py @@ -20,7 +20,7 @@ import time from scenedetect import detect, SceneManager, FrameTimecode, StatsManager -from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector +from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector, HashDetector from scenedetect.backends.opencv import VideoStreamCv2 # TODO(v1.0): Parameterize these tests like VideoStreams are. @@ -87,6 +87,28 @@ def test_adaptive_detector(test_movie_clip): assert scene_list[-1][1] == end_time +def test_hash_detector(test_movie_clip): + """ Test SceneManager with VideoStreamCv2 and HashDetector. """ + video = VideoStreamCv2(test_movie_clip) + scene_manager = SceneManager() + scene_manager.add_detector(HashDetector()) + scene_manager.auto_downscale = True + + video_fps = video.frame_rate + start_time = FrameTimecode('00:00:50', video_fps) + end_time = FrameTimecode('00:01:19', video_fps) + + video.seek(start_time) + scene_manager.detect_scenes(video=video, end_time=end_time) + + scene_list = scene_manager.get_scene_list() + assert len(scene_list) == len(TEST_MOVIE_CLIP_START_FRAMES_ACTUAL) + detected_start_frames = [timecode.get_frames() for timecode, _ in scene_list] + assert TEST_MOVIE_CLIP_START_FRAMES_ACTUAL == detected_start_frames + # Ensure last scene's end timecode matches the end time we set. + assert scene_list[-1][1] == end_time + + def test_threshold_detector(test_video_file): """ Test SceneManager with VideoStreamCv2 and ThresholdDetector. """ video = VideoStreamCv2(test_video_file) @@ -103,7 +125,7 @@ def test_threshold_detector(test_video_file): def test_detectors_with_stats(test_video_file): """ Test all detectors functionality with a StatsManager. """ # TODO(v1.0): Parameterize this test case (move fixture from cli to test config). - for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector]: + for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector, HashDetector]: video = VideoStreamCv2(test_video_file) stats = StatsManager() scene_manager = SceneManager(stats_manager=stats) From 26181d1c732d73da60aa5c19ea326c619d60bd77 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Fri, 21 Oct 2022 22:22:59 -0400 Subject: [PATCH 16/19] Updated requirements due to breaking change in PyAV. --- requirements.txt | 2 +- requirements_headless.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2bd81bb6..bc6dca1a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ # TODO: Replace appdirs with platformdirs. appdirs av<=8.0.3; python_version <= "3.6" -av; python_version > "3.6" +av<10.0; python_version > "3.6" click moviepy numpy diff --git a/requirements_headless.txt b/requirements_headless.txt index 59a02c95..9a7f196e 100644 --- a/requirements_headless.txt +++ b/requirements_headless.txt @@ -3,7 +3,7 @@ # appdirs av<=8.0.3; python_version <= "3.6" -av; python_version > "3.6" +av<10.0; python_version > "3.6" click moviepy numpy From ff3849bddf68f40077731b44e654b8b8e24c653d Mon Sep 17 00:00:00 2001 From: wjs018 Date: Fri, 21 Oct 2022 22:40:20 -0400 Subject: [PATCH 17/19] Update appveyor CI config. --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 7347ddc1..0d2f6165 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -59,7 +59,7 @@ test_script: - python -m scenedetect version - python -m scenedetect -i tests/resources/testvideo.mp4 -b opencv detect-content time -e 2s # Test with optional PyAV backend - - python -m pip install av + - python -m pip install av<10.0 - python -m scenedetect -i tests/resources/testvideo.mp4 -b pyav detect-content time -e 2s # Cleanup - python -m pip uninstall -y scenedetect av From abeca415fab824e38db0a11d445e4a2c278761d2 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Fri, 21 Oct 2022 22:54:58 -0400 Subject: [PATCH 18/19] Updated pip commands for appveyor. --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 0d2f6165..29f20215 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -28,7 +28,7 @@ install: - python -m pip install --upgrade pip build wheel virtualenv setuptools # Make sure we get latest binary packages of the video input libraries. - - python -m pip install av opencv-python-headless --only-binary ":all:" + - python -m pip install 'av<10.0' opencv-python-headless --only-binary ":all:" # Install other PySceneDetect dependencies and checkout resources required for tests. - python -m pip install -r requirements_headless.txt @@ -59,7 +59,7 @@ test_script: - python -m scenedetect version - python -m scenedetect -i tests/resources/testvideo.mp4 -b opencv detect-content time -e 2s # Test with optional PyAV backend - - python -m pip install av<10.0 + - python -m pip install 'av<10.0' - python -m scenedetect -i tests/resources/testvideo.mp4 -b pyav detect-content time -e 2s # Cleanup - python -m pip uninstall -y scenedetect av From fd9de34ae5ad60a01d15bc17e0c7805e5ad38114 Mon Sep 17 00:00:00 2001 From: wjs018 Date: Fri, 21 Oct 2022 23:03:23 -0400 Subject: [PATCH 19/19] Correcting escape characters for Windows terminal. --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 29f20215..aa058c33 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -28,7 +28,7 @@ install: - python -m pip install --upgrade pip build wheel virtualenv setuptools # Make sure we get latest binary packages of the video input libraries. - - python -m pip install 'av<10.0' opencv-python-headless --only-binary ":all:" + - python -m pip install "av<10.0" opencv-python-headless --only-binary ":all:" # Install other PySceneDetect dependencies and checkout resources required for tests. - python -m pip install -r requirements_headless.txt @@ -59,7 +59,7 @@ test_script: - python -m scenedetect version - python -m scenedetect -i tests/resources/testvideo.mp4 -b opencv detect-content time -e 2s # Test with optional PyAV backend - - python -m pip install 'av<10.0' + - python -m pip install "av<10.0" - python -m scenedetect -i tests/resources/testvideo.mp4 -b pyav detect-content time -e 2s # Cleanup - python -m pip uninstall -y scenedetect av