diff --git a/scenedetect.cfg b/scenedetect.cfg index 83c1d925..33a83d43 100644 --- a/scenedetect.cfg +++ b/scenedetect.cfg @@ -269,6 +269,14 @@ # Display list of cut points generated from scene boundaries (yes/no). #display-cuts = yes +# Separator to use between columns in output file. Must be single (escaped) +# ASCII character. +#col-separator = , + +# Separator to use between rows in output file. Must be (escaped) ASCII +# characters. +#row-separator = \n + # Format to use for list of cut points (frames, seconds, timecode). #cut-format = timecode diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py index c26b6263..0f02bd6c 100644 --- a/scenedetect/_cli/__init__.py +++ b/scenedetect/_cli/__init__.py @@ -1114,6 +1114,7 @@ def list_scenes_command( output_dir = ctx.config.get_value("list-scenes", "output", output) name_format = ctx.config.get_value("list-scenes", "filename", filename) list_scenes_args = { + "col_separator": ctx.config.get_value("list-scenes", "col-separator"), "cut_format": ctx.config.get_value("list-scenes", "cut-format"), "display_scenes": ctx.config.get_value("list-scenes", "display-scenes"), "display_cuts": ctx.config.get_value("list-scenes", "display-cuts"), @@ -1122,6 +1123,7 @@ def list_scenes_command( "skip_cuts": ctx.config.get_value("list-scenes", "skip-cuts", skip_cuts), "output_dir": output_dir, "quiet": ctx.config.get_value("list-scenes", "quiet", quiet) or ctx.quiet_mode, + "row_separator": ctx.config.get_value("list-scenes", "row-separator"), } ctx.add_command(cli_commands.list_scenes, list_scenes_args) diff --git a/scenedetect/_cli/commands.py b/scenedetect/_cli/commands.py index 17b6b5c9..857507d3 100644 --- a/scenedetect/_cli/commands.py +++ b/scenedetect/_cli/commands.py @@ -105,6 +105,8 @@ def list_scenes( display_scenes: bool, display_cuts: bool, cut_format: str, + col_separator: str, + row_separator: str, ): """Handles the `list-scenes` command.""" # Write scene list CSV to if required. @@ -125,6 +127,8 @@ def list_scenes( scene_list=scenes, include_cut_list=not skip_cuts, cut_list=cuts, + col_separator=col_separator, + row_separator=row_separator, ) # Suppress output if requested. if quiet: diff --git a/scenedetect/_cli/config.py b/scenedetect/_cli/config.py index 2236ac43..135fbe89 100644 --- a/scenedetect/_cli/config.py +++ b/scenedetect/_cli/config.py @@ -60,6 +60,12 @@ def from_config(config_value: str, default: "ValidatedValue") -> "ValidatedValue """ raise NotImplementedError() + def __repr__(self) -> str: + return str(self.value) + + def __str__(self) -> str: + return str(self.value) + class TimecodeValue(ValidatedValue): """Validator for timecode values in seconds (100.0), frames (100), or HH:MM:SS. @@ -75,12 +81,6 @@ def __init__(self, value: Union[int, float, str]): def value(self) -> Union[int, float, str]: return self._value - def __repr__(self) -> str: - return str(self.value) - - def __str__(self) -> str: - return str(self.value) - @staticmethod def from_config(config_value: str, default: "TimecodeValue") -> "TimecodeValue": try: @@ -121,12 +121,6 @@ def max_val(self) -> Union[int, float]: """Maximum value of the range.""" return self._max_val - def __repr__(self) -> str: - return str(self.value) - - def __str__(self) -> str: - return str(self.value) - @staticmethod def from_config(config_value: str, default: "RangeValue") -> "RangeValue": try: @@ -163,9 +157,6 @@ def __init__(self, value: Union[str, ContentDetector.Components]): def value(self) -> Tuple[float, float, float, float]: return self._value - def __repr__(self) -> str: - return str(self.value) - def __str__(self) -> str: return "%.3f, %.3f, %.3f, %.3f" % self.value @@ -199,9 +190,6 @@ def __init__(self, value: int): def value(self) -> int: return self._value - def __repr__(self) -> str: - return str(self.value) - def __str__(self) -> str: if self.value is None: return "auto" @@ -217,6 +205,42 @@ def from_config(config_value: str, default: "KernelSizeValue") -> "KernelSizeVal ) from ex +class EscapedString(ValidatedValue): + """Strings that can contain escape sequences, e.g. the literal \n.""" + + def __init__(self, value: str, length_limit: int = 0): + self._value = value.encode("utf-8").decode("unicode_escape") + if length_limit and len(self._value) > length_limit: + raise OptionParseFailure(f"Value must be no longer than {length_limit} characters.") + + @property + def value(self) -> str: + """Get the value after validation.""" + return self._value + + @staticmethod + def from_config( + config_value: str, default: "EscapedString", length_limit: int = 0 + ) -> "EscapedString": + try: + return EscapedString(config_value, length_limit) + except (UnicodeDecodeError, UnicodeEncodeError) as ex: + raise OptionParseFailure( + "Value must be valid UTF-8 string with escape characters." + ) from ex + + +class EscapedChar(EscapedString): + """Strings that can contain escape sequences but can be a maximum of 1 character in length.""" + + def __init__(self, value: str): + super().__init__(value, length_limit=1) + + @staticmethod + def from_config(config_value: str, default: "EscapedString") -> "EscapedChar": + return EscapedString.from_config(config_value, default, length_limit=1) + + class TimecodeFormat(Enum): """Format to display timecodes.""" @@ -304,10 +328,12 @@ def format(self, timecode: FrameTimecode) -> str: }, "list-scenes": { "cut-format": TimecodeFormat.TIMECODE, + "col-separator": EscapedChar(","), "display-cuts": True, "display-scenes": True, "filename": "$VIDEO_NAME-Scenes.csv", "output": None, + "row-separator": EscapedString("\n"), "no-output-file": False, "quiet": False, "skip-cuts": False, diff --git a/scenedetect/_cli/context.py b/scenedetect/_cli/context.py index 3e247259..bd8f88d6 100644 --- a/scenedetect/_cli/context.py +++ b/scenedetect/_cli/context.py @@ -215,7 +215,7 @@ def handle_options( raise click.Abort() if self.config.config_dict: - logger.debug("Current configuration:\n%s", str(self.config.config_dict)) + logger.debug("Current configuration:\n%s", str(self.config.config_dict).encode("utf-8")) logger.debug("Parsing program options.") if stats is not None and frame_skip: diff --git a/scenedetect/scene_manager.py b/scenedetect/scene_manager.py index 43bc46a9..23be9665 100644 --- a/scenedetect/scene_manager.py +++ b/scenedetect/scene_manager.py @@ -216,7 +216,9 @@ def write_scene_list( scene_list: SceneList, include_cut_list: bool = True, cut_list: Optional[CutList] = None, -) -> None: + col_separator: str = ",", + row_separator: str = "\n", +): """Writes the given list of scenes to an output file handle in CSV format. Arguments: @@ -227,8 +229,13 @@ def write_scene_list( cut_list: Optional list of FrameTimecode objects denoting the cut list (i.e. the frames in the video that need to be split to generate individual scenes). If not specified, the cut list is generated using the start times of each scene following the first one. + col_separator: Delimiter to use between values. Must be single character. + row_separator: Line terminator to use between rows. + + Raises: + TypeError: "delimiter" must be a 1-character string """ - csv_writer = csv.writer(output_csv_file, lineterminator="\n") + csv_writer = csv.writer(output_csv_file, delimiter=col_separator, lineterminator=row_separator) # If required, output the cutting list as the first row (i.e. before the header row). if include_cut_list: csv_writer.writerow( diff --git a/tests/test_cli.py b/tests/test_cli.py index 2bcd2435..9a0f0339 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -294,15 +294,38 @@ def test_cli_list_scenes(tmp_path: Path): ) == 0 ) - # Add statsfile + output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv") + assert os.path.exists(output_path) + EXPECTED_CSV_OUTPUT = """Timecode List:,00:00:03.754 +Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds) +1,49,00:00:02.002,2.002,90,00:00:03.754,3.754,42,00:00:01.752,1.752 +2,91,00:00:03.754,3.754,144,00:00:06.006,6.006,54,00:00:02.252,2.252 +""" + assert output_path.read_text() == EXPECTED_CSV_OUTPUT + + +def test_cli_list_scenes_skip_cuts(tmp_path: Path): + """Test `list-scenes` command with the -s/--skip-cuts option for RFC 4180 compliance.""" + # Regular invocation assert ( invoke_scenedetect( - "-i {VIDEO} -s {STATS} time {TIME} {DETECTOR} list-scenes", + "-i {VIDEO} time {TIME} {DETECTOR} list-scenes -s", output_dir=tmp_path, ) == 0 ) - # Suppress output file + output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv") + assert os.path.exists(output_path) + EXPECTED_CSV_OUTPUT = """Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds) +1,49,00:00:02.002,2.002,90,00:00:03.754,3.754,42,00:00:01.752,1.752 +2,91,00:00:03.754,3.754,144,00:00:06.006,6.006,54,00:00:02.252,2.252 +""" + assert output_path.read_text() == EXPECTED_CSV_OUTPUT + + +def test_cli_list_scenes_no_output(tmp_path: Path): + """Test `list-scenes` command with the -n flag.""" + output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv") assert ( invoke_scenedetect( "-i {VIDEO} time {TIME} {DETECTOR} list-scenes -n", @@ -310,8 +333,51 @@ def test_cli_list_scenes(tmp_path: Path): ) == 0 ) - # TODO: Check for output files from regular invocation. - # TODO: Delete scene list and ensure is not recreated using -n. + assert not os.path.exists(output_path) + + +def test_cli_list_scenes_custom_delimiter(tmp_path: Path): + """Test `list-scenes` command with custom delimiters set in a config file.""" + config_path = tmp_path.joinpath("config.cfg") + config_path.write_text(""" +[list-scenes] +col-separator = | +row-separator = \\t +""") + assert ( + invoke_scenedetect( + f"-i {{VIDEO}} -c {config_path} time {{TIME}} {{DETECTOR}} list-scenes", + output_dir=tmp_path, + ) + == 0 + ) + output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv") + assert os.path.exists(output_path) + EXPECTED_CSV_OUTPUT = """Timecode List:,00:00:03.754 +Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds) +1,49,00:00:02.002,2.002,90,00:00:03.754,3.754,42,00:00:01.752,1.752 +2,91,00:00:03.754,3.754,144,00:00:06.006,6.006,54,00:00:02.252,2.252 +""" + EXPECTED_CSV_OUTPUT = EXPECTED_CSV_OUTPUT.replace(",", "|").replace("\n", "\t") + assert output_path.read_text() == EXPECTED_CSV_OUTPUT + + +def test_cli_list_scenes_rejects_multichar_col_separator(tmp_path: Path): + """Test `list-scenes` command with custom delimiters set in a config file.""" + config_path = tmp_path.joinpath("config.cfg") + config_path.write_text(""" +[list-scenes] +col-separator = || +""") + assert ( + invoke_scenedetect( + f"-i {{VIDEO}} -c {config_path} time {{TIME}} {{DETECTOR}} list-scenes", + output_dir=tmp_path, + ) + != 0 + ) + output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv") + assert not os.path.exists(output_path) @pytest.mark.skipif(condition=not is_ffmpeg_available(), reason="ffmpeg is not available") diff --git a/website/pages/changelog.md b/website/pages/changelog.md index 3fc83772..865fccf6 100644 --- a/website/pages/changelog.md +++ b/website/pages/changelog.md @@ -596,4 +596,6 @@ Development - [improvement] `save_to_csv` now works with paths from `pathlib` - [bugfix] Fix `SyntaxWarning` due to incorrect escaping [#400](https://github.com/Breakthrough/PySceneDetect/issues/400) - [bugfix] Fix `ContentDetector` crash when using callbacks [#416](https://github.com/Breakthrough/PySceneDetect/issues/416) [#420](https://github.com/Breakthrough/PySceneDetect/issues/420) - + - [api] The `save_to_csv` function now works correctly with paths from the `pathlib` module + - [api] Add `col_separator` and `row_separator` args to `write_scene_list` function in `scenedetect.scene_manager` + - [feature] Add ability to configure CSV separators for rows/columns in config file [#423](https://github.com/Breakthrough/PySceneDetect/issues/423)