diff --git a/detection/component_util/mpf_component_util/__init__.py b/detection/component_util/mpf_component_util/__init__.py index 18e9f8c..7174337 100644 --- a/detection/component_util/mpf_component_util/__init__.py +++ b/detection/component_util/mpf_component_util/__init__.py @@ -40,5 +40,5 @@ from .http_retry import HttpRetry from .job_config import ( - TriggerMismatch, NoInBoundsSpeechSegments, TriggeredJobConfig, DynamicSpeechJobConfig, SpeakerInfo + NoInBoundsSpeechSegments, DynamicSpeechJobConfig, SpeakerInfo ) diff --git a/detection/component_util/mpf_component_util/job_config.py b/detection/component_util/mpf_component_util/job_config.py index 606cb29..8e44e57 100644 --- a/detection/component_util/mpf_component_util/job_config.py +++ b/detection/component_util/mpf_component_util/job_config.py @@ -26,34 +26,14 @@ from collections import defaultdict from pathlib import Path -from typing import Union, Optional, Mapping, List, Dict, Tuple, NamedTuple, Any +from typing import Dict, List, Mapping, NamedTuple, Optional, Tuple, Union import mpf_component_api as mpf import mpf_component_util as mpf_util - -MpfJob = Union[mpf.AudioJob, mpf.VideoJob, mpf.ImageJob, mpf.GenericJob] MpfSpeechJob = Union[mpf.AudioJob, mpf.VideoJob] -class TriggerMismatch(Exception): - """ Exception raised when the feed-forward track does not meet the trigger. - - :param trigger_key: The trigger key defined by TRIGGER property - :param expected: The expected value defined by TRIGGER property - :param trigger_val: The actual value of the trigger property (if present) - """ - def __init__(self, trigger_key: str, expected: str, trigger_val: Optional[str]=None): - self.trigger_key = trigger_key - self.expected = expected - self.trigger_val = trigger_val - - def __str__(self): - if self.trigger_val is None: - return f"Trigger property {self.trigger_key} not present in feed-forward detection properties" - return f"Expected {self.trigger_key} to be {self.expected}, got {self.trigger_val}" - - class NoInBoundsSpeechSegments(Exception): def __init__(self, n_early: int, n_late: int): self.n_early = n_early @@ -65,72 +45,26 @@ def __str__(self): f"job start time, {self.n_late} after job end time)." ) -class TriggeredJobConfig(object): - """ Handles job parsing logic - - :ivar job_name: Job name - :ivar target_file: File location of input data - :ivar is_triggered_job: Whether job contains a feed-forward track - """ - def __init__(self, job: MpfJob): - # General job data - self.job_name: str - self.target_file: Path - self.is_triggered_job: bool = False - self._add_job_data(job) - - if self.is_triggered_job: - self._check_trigger(job) - - # Job properties - self._add_job_properties(job.job_properties) - - def _add_job_data(self, job: MpfJob): - self.target_file = Path(job.data_uri) - self.job_name = job.job_name - if job.feed_forward_track is not None: - self.is_triggered_job = 'TRIGGER' in job.job_properties - - @staticmethod - def _check_trigger(job: MpfJob): - # Check TRIGGER is met - trigger = mpf_util.get_property( - properties=job.job_properties, - key='TRIGGER', - default_value='', - prop_type=str - ) - - t_key, t_val = trigger.strip().split('=') - - # If trigger key is in feed-forward properties, only run - # transcription if the value matches trigger val - if t_key not in job.feed_forward_track.detection_properties: - raise TriggerMismatch(t_key, t_val) - if job.feed_forward_track.detection_properties[t_key] != t_val: - raise TriggerMismatch(t_key, t_val, job.feed_forward_track.detection_properties[t_key]) - - def _add_job_properties(self, job_properties: Mapping[str, str]): - raise NotImplementedError() - class SpeakerInfo(NamedTuple): speaker_id: str - gender: str - gender_score: float language: str language_scores: Dict[str, float] - speech_segs: List[Any] + speech_segs: List[Tuple[int, int]] + gender: Optional[str] = None + gender_score: Optional[float] = None -class DynamicSpeechJobConfig(TriggeredJobConfig): +class DynamicSpeechJobConfig: """ Handles job parsing logic for components that may be part of a dynamic speech pipeline + :ivar job_name: Job name + :ivar target_file: File location of input data :ivar start_time: Start time of the audio (in milliseconds) :ivar stop_time: Stop time of the audio (in milliseconds) :ivar fps: Frames per second for video jobs - :ivar speaker_id_prefix: Prefix for LONG_SPEAKER_ID + :ivar speaker_id_prefix: Prefix for SPEAKER_ID :ivar override_default_language: A default ISO 639-3 language to use when languages defined in the speaker are not supported. If the feed-forward track does not exist, this is None @@ -138,21 +72,29 @@ class DynamicSpeechJobConfig(TriggeredJobConfig): if feed-forward track exists, otherwise None """ def __init__(self, job: MpfSpeechJob): + self.job_name = job.job_name + self.target_file = Path(job.data_uri) self.start_time: int self.stop_time: Optional[int] = None self.fps: Optional[float] = None self.speaker_id_prefix: str - super().__init__(job) + + self._add_media_info(job) + self._add_job_properties(job.job_properties) # Properties related to dynamic speech pipelines self.speaker: Optional[SpeakerInfo] = None self.override_default_language: Optional[str] = None - if self.is_triggered_job: + if (job.feed_forward_track and + 'VOICED_SEGMENTS' in job.feed_forward_track.detection_properties): self._add_feed_forward_properties(job) - def _add_job_data(self, job: MpfSpeechJob): - super()._add_job_data(job) + def _add_job_properties(self, job_properties: Mapping[str, str]): + raise NotImplementedError() + + + def _add_media_info(self, job: MpfSpeechJob): media_duration = float(job.media_properties.get('DURATION', -1)) if isinstance(job, mpf.VideoJob): start_frame = job.start_frame @@ -192,11 +134,12 @@ def _add_job_data(self, job: MpfSpeechJob): self.stop_time = None self.speaker_id_prefix = f"{self.start_time}-{self.stop_time or 'EOF'}-" + def _add_feed_forward_properties(self, job: MpfSpeechJob): feed_forward_properties = job.feed_forward_track.detection_properties speaker_id = mpf_util.get_property( properties=feed_forward_properties, - key='LONG_SPEAKER_ID', + key='SPEAKER_ID', default_value='0-EOF-1', prop_type=str ) @@ -204,14 +147,14 @@ def _add_feed_forward_properties(self, job: MpfSpeechJob): gender = mpf_util.get_property( properties=feed_forward_properties, key='GENDER', - default_value='', + default_value=None, prop_type=str ) gender_score = mpf_util.get_property( properties=feed_forward_properties, key='GENDER_CONFIDENCE', - default_value=-1.0, + default_value=None, prop_type=float ) @@ -268,20 +211,18 @@ def _add_feed_forward_properties(self, job: MpfSpeechJob): speaker_id=speaker_id, language=language, language_scores=language_scores, + speech_segs=speech_segs, gender=gender, - gender_score=gender_score, - speech_segs=speech_segs + gender_score=gender_score ) - def _add_job_properties(self, job_properties: Mapping[str, str]): - raise NotImplementedError() @staticmethod def _parse_segments_str( segments_string: str, - media_start_time: Optional[int] = 0, + media_start_time: int = 0, media_stop_time: Optional[int] = None - ) -> Optional[List[Tuple[int, int]]]: + ) -> List[Tuple[int, int]]: """ Converts a string of the form 'start_1-stop_1, start_2-stop_2, ..., start_n-stop_n' where start_x and stop_x are in milliseconds, to a list of int pairs diff --git a/detection/examples/PythonOcvComponent/ocv_component/ocv_component.py b/detection/examples/PythonOcvComponent/ocv_component/ocv_component.py index 61b8eda..98288be 100644 --- a/detection/examples/PythonOcvComponent/ocv_component/ocv_component.py +++ b/detection/examples/PythonOcvComponent/ocv_component/ocv_component.py @@ -35,8 +35,6 @@ logger = logging.getLogger('OcvComponent') class OcvComponent(mpf_util.ImageReaderMixin, mpf_util.VideoCaptureMixin): - detection_type = 'TEST OCV DETECTION TYPE' - def get_detections_from_image_reader( self, diff --git a/detection/examples/PythonOcvComponent/plugin-files/descriptor/descriptor.json b/detection/examples/PythonOcvComponent/plugin-files/descriptor/descriptor.json index eec0b62..924ed04 100644 --- a/detection/examples/PythonOcvComponent/plugin-files/descriptor/descriptor.json +++ b/detection/examples/PythonOcvComponent/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "PYTHONOCVTEST", "description": "A dummy Python detection component.", "actionType": "DETECTION", + "trackType": "TEST OCV TRACK TYPE", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/detection/examples/PythonTestComponent/descriptor/descriptor.json b/detection/examples/PythonTestComponent/descriptor/descriptor.json index 7335ac3..da73d8e 100644 --- a/detection/examples/PythonTestComponent/descriptor/descriptor.json +++ b/detection/examples/PythonTestComponent/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "PYTHONTEST", "description": "A dummy Python detection component.", "actionType": "DETECTION", + "trackType": "TEST TRACK TYPE", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/detection/examples/PythonTestComponent/test_component.py b/detection/examples/PythonTestComponent/test_component.py index aad50b5..56d97a3 100644 --- a/detection/examples/PythonTestComponent/test_component.py +++ b/detection/examples/PythonTestComponent/test_component.py @@ -33,7 +33,6 @@ class TestComponent(object): - detection_type = 'TEST DETECTION TYPE' def __init__(self): logger.info('Creating instance of TestComponent')