From e252c914704ebb17bbce992be784b8b3fd39d61c Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Tue, 28 Oct 2025 09:39:49 +0100 Subject: [PATCH 01/16] squash feta/to_cdf --- kloppy/_providers/cdf.py | 0 kloppy/domain/models/common.py | 42 ++ kloppy/domain/models/position.py | 8 + kloppy/domain/models/tracking.py | 7 +- .../serializers/tracking/cdf/__init__.py | 3 + .../serializers/tracking/cdf/serializer.py | 459 ++++++++++++++++++ .../infra/serializers/tracking/serializer.py | 17 + kloppy/tests/test_cdf.py | 66 +++ kloppy/tests/test_statsbomb.py | 14 + setup.py | 1 + 10 files changed, 616 insertions(+), 1 deletion(-) create mode 100644 kloppy/_providers/cdf.py create mode 100644 kloppy/infra/serializers/tracking/cdf/__init__.py create mode 100644 kloppy/infra/serializers/tracking/cdf/serializer.py create mode 100644 kloppy/infra/serializers/tracking/serializer.py create mode 100644 kloppy/tests/test_cdf.py diff --git a/kloppy/_providers/cdf.py b/kloppy/_providers/cdf.py new file mode 100644 index 000000000..e69de29bb diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index e0ab88ce1..caf7c868e 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -116,6 +116,7 @@ class Provider(Enum): DATAFACTORY (Provider): STATSPERFORM (Provider): SPORTVU (Provider): + CDF (Provider): OTHER (Provider): """ @@ -134,6 +135,7 @@ class Provider(Enum): HAWKEYE = "hawkeye" SPORTVU = "sportvu" SIGNALITY = "signality" + CDF = "common_data_format" OTHER = "other" def __str__(self): @@ -1183,6 +1185,45 @@ def pitch_dimensions(self) -> PitchDimensions: pitch_width=None, standardized=False, ) + + +class CDFCoordinateSystem(ProviderCoordinateSystem): + """ + CDFCoordinateSystem coordinate system. + + Uses a pitch with the origin at the center and the y-axis oriented + from bottom to top. The coordinates are in meters. + """ + + @property + def provider(self) -> Provider: + return Provider.CDF + + @property + def origin(self) -> Origin: + return Origin.CENTER + + @property + def vertical_orientation(self) -> VerticalOrientation: + return VerticalOrientation.BOTTOM_TO_TOP + + @property + def pitch_dimensions(self) -> PitchDimensions: + return NormalizedPitchDimensions( + x_dim=Dimension( + -1 * self._pitch_length / 2, self._pitch_length / 2 + ), + y_dim=Dimension( + -1 * self._pitch_width / 2, self._pitch_width / 2 + ), + pitch_length = self._pitch_length, + pitch_width=self._pitch_width, + standardized=False, + ) + + def __init__(self, base_coordinate_system: ProviderCoordinateSystem): + self._pitch_length = base_coordinate_system.pitch_dimensions.pitch_length + self._pitch_width = base_coordinate_system.pitch_dimensions.pitch_width class SignalityCoordinateSystem(ProviderCoordinateSystem): @@ -1390,6 +1431,7 @@ def build_coordinate_system( Provider.HAWKEYE: HawkEyeCoordinateSystem, Provider.SPORTVU: SportVUCoordinateSystem, Provider.SIGNALITY: SignalityCoordinateSystem, + Provider.CDF: CDFCoordinateSystem, } if provider in coordinate_systems: diff --git a/kloppy/domain/models/position.py b/kloppy/domain/models/position.py index 84d082800..328a2fb7a 100644 --- a/kloppy/domain/models/position.py +++ b/kloppy/domain/models/position.py @@ -78,6 +78,14 @@ def parent(self): return PositionType[self._parent] return None + @property + def position_group(self): + current = self + while current.parent is not None: + current = current.parent + + return current + def is_subtype_of(self, other): current = self while current is not None: diff --git a/kloppy/domain/models/tracking.py b/kloppy/domain/models/tracking.py index df60be76f..a34cb7405 100644 --- a/kloppy/domain/models/tracking.py +++ b/kloppy/domain/models/tracking.py @@ -83,6 +83,7 @@ def frame_rate(self): @deprecated( "to_pandas will be removed in the future. Please use to_df instead." ) + def to_pandas( self, record_converter: Optional[Callable[[Frame], Dict]] = None, @@ -118,6 +119,10 @@ def generic_record_converter(frame: Frame): return pd.DataFrame.from_records( map(generic_record_converter, self.records) ) - + + @property + def to_common_data_format(self)->[object]: + + return [] __all__ = ["Frame", "TrackingDataset", "PlayerData"] diff --git a/kloppy/infra/serializers/tracking/cdf/__init__.py b/kloppy/infra/serializers/tracking/cdf/__init__.py new file mode 100644 index 000000000..21944af32 --- /dev/null +++ b/kloppy/infra/serializers/tracking/cdf/__init__.py @@ -0,0 +1,3 @@ +from kloppy.domain.models.common import CDFCoordinateSystem + +__all__ = ["CDFCoordinateSystem"] \ No newline at end of file diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py new file mode 100644 index 000000000..99648067d --- /dev/null +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -0,0 +1,459 @@ +import json +import tempfile +from typing import IO, NamedTuple + +from kloppy.domain import Provider, TrackingDataset, PositionType +from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer + + +class CDFOutputs(NamedTuple): + meta_data: IO[bytes] + tracking_data: list[IO[bytes]] + + +class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): + provider = Provider.CDF + + # to infer the starting formation if not given + @staticmethod + def get_starting_formation(team_players) -> str: + """ + determine the starting formation if not define. + + Args: + team: The team on which we want to infer the formation. + + Returns: + formation: the infered formation. + """ + formation = "" + default_formation = "4-3-3" + defender = midfielder = attacker = 0 + for player in team_players: + if player.starting_position.position_group == None: + continue + elif ( + player.starting_position.position_group + == PositionType.Attacker + ): + attacker += 1 + elif ( + player.starting_position.position_group + == PositionType.Midfielder + ): + midfielder += 1 + elif ( + player.starting_position.position_group + == PositionType.Defender + ): + defender += 1 + if defender + midfielder + attacker == 10: + formation = f"{defender}-{midfielder}-{attacker}" + elif defender + midfielder + attacker != 10: + formation = default_formation + return formation + + def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: + """ + Serialize a TrackingDataset to Common Data Format. + + Args: + dataset: The tracking dataset to serialize + outputs: CDFOutputs containing file handles for metadata and tracking data + + Returns: + bool: True if serialization was successful, False otherwise + """ + + from kloppy.domain import ( + Orientation, + BallState, + ) + + # builded coordinateSystem class. + from kloppy.domain.models.common import CDFCoordinateSystem + + # setting it as coordinate system of the imported data + dataset = dataset.transform( + to_coordinate_system=CDFCoordinateSystem( + dataset.metadata.coordinate_system + ), + to_orientation=Orientation.STATIC_HOME_AWAY, + ) + + ## building Tracking jsonl + # list of different periods within a game define by the cdf + periods = { + 1: "first_half", + 2: "second_half", + 3: "first_half_extratime", + 4: "second_half_extratime", + 5: "shootout", + } + + # container for start and end frame_id + period_start_frame_id = { + period.id: None for period in dataset.metadata.periods + } + period_end_frame_id = { + period.id: None for period in dataset.metadata.periods + } + + # container for start and end normalized frame_id + normalized_period_start_frame_id = { + period.id: None for period in dataset.metadata.periods + } + normalized_period_end_frame_id = { + period.id: None for period in dataset.metadata.periods + } + + # diffence of ids between frame_ids + period_offset = {period.id: 0 for period in dataset.metadata.periods} + + # Get home and away team data + home_team, away_team = dataset.metadata.teams + + # Get the players Id. + home_player_ids, away_player_ids = ( + [player.player_id for player in home_team.players], + [player.player_id for player in away_team.players], + ) + + frame_id = 0 # Use for the cdf_frame_ids.. + for frame in dataset.frames: + frame_data = {} + # Frame ID specified by the CDF + frame_data["frame_id"] = frame_id + # Original frame_id + frame_data["original_frame_id"] = frame.frame_id + # Timestamp + frame_data["timestamp"] = str( + dataset.metadata.date + frame.timestamp + ) + # Period + frame_data["period"] = periods.get(frame.period.id, "unknownn") + period_id = frame.period.id + # Update the start and end id for this period + if period_start_frame_id[period_id] is None: + period_start_frame_id[period_id] = frame_data[ + "original_frame_id" + ] + + if ( + period_id > 1 + and period_end_frame_id[period_id - 1] is not None + ): + prev_period_length = ( + period_end_frame_id[period_id - 1] + - period_start_frame_id[period_id - 1] + + 1 + ) + period_offset[period_id] = ( + period_offset[period_id - 1] + prev_period_length + ) + + # Set normalized start frame id + normalized_period_start_frame_id[period_id] = period_offset[ + period_id + ] + + period_end_frame_id[period_id] = frame_data["original_frame_id"] + + normalized_frame_id = ( + frame_data["original_frame_id"] + - period_start_frame_id[period_id] + ) + period_offset[period_id] + + # Update normalized end frame id + normalized_period_end_frame_id[period_id] = normalized_frame_id + + # Match ID + frame_data["match"] = {"id": str(dataset.metadata.game_id)} + # Ball status + frame_data["ball_status"] = frame.ball_state == BallState.ALIVE + + # Teams and players + home_players = [] + for player, coordinates in frame.players_coordinates.items(): + if player.player_id in home_player_ids: + try: + x = coordinates.x + y = coordinates.x + home_players.append( + { + "id": player.player_id, + "x": round(x, 3), + "y": round(y, 3), + "position": player.starting_position.code, + } + ) + except KeyError: + continue + + away_players = [] + for player, coordinates in frame.players_coordinates.items(): + if player.player_id in away_player_ids: + try: + x = coordinates.x + y = coordinates.x + away_players.append( + { + "id": player.player_id, + "x": round(x, 3), + "y": round(y, 3), + "position": player.starting_position.code, + } + ) + except KeyError: + continue + + # teams within the tracking data. + + home_players_id = [] + away_players_id = [] + for player, _ in frame.players_coordinates.items(): + if player.team == home_team: + home_players_id.append(player.player_id) + if player.team == away_team: + away_players_id.append(player.player_id) + set_of_home_players_id_in_the_frame = set(home_players_id) + set_of_away_players_id_in_the_frame = set(away_players_id) + + frame_data["teams"] = { + "home": { + "id": home_team.team_id, + "players": home_players, + "jersey_color": " ", # + "name": home_team.name, + "formation": ( + home_team.formations.at_start() + if home_team.formations.items + else self.get_starting_formation( + [ + p + for p in home_team.players + if p.player_id + in set_of_home_players_id_in_the_frame + ] + ) + ), + }, + "away": { + "id": away_team.team_id, + "players": away_players, + "jersey_color": " ", + "name": away_team.name, + "formation": ( + away_team.formations.at_start() + if away_team.formations.items + else self.get_starting_formation( + [ + p + for p in away_team.players + if p.player_id + in set_of_away_players_id_in_the_frame + ] + ) + ), + }, + } + + # Ball + if ( + frame_data["ball_status"] == True + and frame.ball_coordinates is not None + ): + try: + ball_x = round(frame.ball_coordinates.x, 3) + ball_y = round(frame.ball_coordinates.y, 3) + ball_z = round(frame.ball_coordinates.z, 3) + except KeyError: + ball_x = ball_y = ball_z = None + else: + ball_x = ( + ball_y + ) = ball_z = 404 # default missing value for ball coordinates + + frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} + + # update the frame_id + frame_id += 1 + + # build a temporary jsonl for each frame + frame_file = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) + frame_file.write((json.dumps(frame_data) + "\n").encode("utf-8")) + frame_file.flush() # make sure data is written + + # Add to tracking list + outputs.tracking_data.append(frame_file) + + ###################### build now the metadata. + # Output containers + metadata_json = {} + # Competition infos. + metadata_json["competition"] = { + "id": "MISSING_MANDATORY_COMPETITION_ID", + "name": "", + "format": "", + "age_restriction": "", + "type": "", + } + + # season infos. + metadata_json["season"] = { + "id": "MISSING_MANDATORY_SEASON_ID", + "name": "", + } + + # match infos. + periods_info = [] + for period in dataset.metadata.periods: + curent_period = { + "period": periods[period.id], + "play_direction": "left_right", + "start_time": str( + dataset.metadata.date + period.start_timestamp + ), + "end_time": str(dataset.metadata.date + period.end_timestamp), + "start_frame_id": normalized_period_start_frame_id[period.id], + "end_frame_id": normalized_period_end_frame_id[period.id], + "left_team_id": home_team.team_id, + "right_team_id": away_team.team_id, + } + periods_info.append(curent_period) + + ## building team_players for metadata + meta_home_players = [] + starters_ids = [] + for player, coordinates in dataset[0].players_coordinates.items(): + starters_ids.append(player.player_id) + + for player in home_team.players: + try: + meta_home_players.append( + { + "id": player.player_id, + "team_id": home_team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters_ids, + } + ) + except KeyError: + continue + + meta_away_players = [] + for player in away_team.players: + try: + meta_away_players.append( + { + "id": player.player_id, + "team_id": away_team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters_ids, + } + ) + except KeyError: + continue + + # get whistles related to period directly from them. + whistles = [] + for period in periods_info: + whistle_start = {} + whistle_end = {} + # type + whistle_start["type"] = period["period"] + whistle_end["type"] = period["period"] + # sub_type + whistle_start["sub_type"] = "start" + whistle_end["sub_type"] = "end" + # time + whistle_start["time"] = period["start_time"] + whistle_end["time"] = period["end_time"] + whistles.append(whistle_start) + whistles.append(whistle_end) + + metadata_json["match"] = { + "id": str(dataset.metadata.game_id), + "kickoff_time": str( + dataset.metadata.date + + dataset.metadata.periods[0].start_timestamp + ), + "periods": periods_info, + "whistles": whistles, + "round": "", + "scheduled_kickoff_time": str(dataset.metadata.date), + "local_kickoff_time": "", + "misc": { + "country": "", + "city": "", + "percipitation": 0, + "is_open_roof": True, # Asume as default value + }, + } + + home_players_id_in_meta = [] + away_players_id_in_meta = [] + for player, _ in dataset[0].players_coordinates.items(): + if player.team == home_team: + home_players_id_in_meta.append(player.player_id) + if player.team == away_team: + away_players_id_in_meta.append(player.player_id) + meta_set_of_home_players_id_in_the_frame = set(home_players_id_in_meta) + meta_set_of_away_players_id_in_the_frame = set(away_players_id_in_meta) + + metadata_json["teams"] = { + "home": { + "id": home_team.team_id, + "players": meta_home_players, + "jersey_color": " ", + "name": home_team.name, + "formation": home_team.starting_formation + or self.get_starting_formation( + [ + p + for p in home_team.players + if p.player_id + in meta_set_of_home_players_id_in_the_frame + ] + ), + }, + "away": { + "id": away_team.team_id, + "players": meta_away_players, + "jersey_color": " ", + "name": away_team.name, + "formation": away_team.starting_formation + or self.get_starting_formation( + [ + p + for p in away_team.players + if p.player_id + in meta_set_of_away_players_id_in_the_frame + ] + ), + }, + } + + metadata_json["stadium"] = { + "id": "MISSING_MANDATORY_STADIUM_ID", + "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, + "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, + "name": "", + "turf": "", + } + + metadata_json["meta"] = { + "video": None, + "tracking": None, + "landmarks": None, + "meta": None, + "cdf": None, + } + + outputs.meta_data.write( + (json.dumps(metadata_json) + "\n").encode("utf-8") + ) + + return True diff --git a/kloppy/infra/serializers/tracking/serializer.py b/kloppy/infra/serializers/tracking/serializer.py new file mode 100644 index 000000000..a7bc72e4c --- /dev/null +++ b/kloppy/infra/serializers/tracking/serializer.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod +from typing import Generic, TypeVar + +from kloppy.domain import Provider, TrackingDataset + +T = TypeVar("T") + + +class TrackingDataSerializer(ABC, Generic[T]): + @property + @abstractmethod + def provider(self) -> Provider: + raise NotImplementedError + + @abstractmethod + def serialize(self, dataset: TrackingDataset, outputs: T) -> bool: + raise NotImplementedError diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py new file mode 100644 index 000000000..67d07cec4 --- /dev/null +++ b/kloppy/tests/test_cdf.py @@ -0,0 +1,66 @@ +import tempfile +from pathlib import Path + +import pytest +import cdf +from cdf import VERSION + +from kloppy import sportec +from kloppy.domain import TrackingDataset +from kloppy.infra.serializers.tracking.cdf.serializer import ( + CDFTrackingDataSerializer, + CDFOutputs, +) + +class TestCDFSerializer: + @pytest.fixture + def raw_data(self, base_dir) -> Path: + return base_dir / "files/sportec_positional.xml" + + @pytest.fixture + def meta_data(self, base_dir) -> Path: + return base_dir / "files/sportec_meta.xml" + + @pytest.fixture + def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: + """Load a small Sportec tracking data snippet for testing CDF serialization.""" + return sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + limit=None, + only_alive=False, + ) + + def test_produces_valid_cdf_output(self, dataset): + """Test that CDFTrackingDataSerializer produces valid CDF output.""" + serializer = CDFTrackingDataSerializer() + + # Instantiate Validators + meta_validator = cdf.MetaSchemaValidator(schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json") + tracking_validator = cdf.TrackingSchemaValidator(schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json") + + with tempfile.NamedTemporaryFile(mode="w+b", suffix=".json", delete=False) as meta_file: + # Initialize empty list for tracking files + tracking_files: list[tempfile._TemporaryFileWrapper] = [] + # Instantiate the named tuple for outputs + outputs = CDFOutputs( + meta_data=meta_file, + tracking_data=tracking_files + ) + # Serialize the dataset + success = serializer.serialize(dataset, outputs) + assert success is True + # Save paths for validation after leaving the block + meta_path = meta_file.name + tracking_paths = [f.name for f in outputs.tracking_data] + + # Validate metadata + meta_validator.validate_schema(sample=meta_path) + # Validate all tracking frame files + for path in tracking_paths: + tracking_validator.validate_schema(sample=path) + + Path(meta_path).unlink() + for path in tracking_paths: + Path(path).unlink() \ No newline at end of file diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index c2d2e9a14..037133bd7 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -184,6 +184,20 @@ def test_player_position(self, dataset): ) assert away_starting_gk.player_id == "5205" # Rui Patricio + assert ( + PositionType.Goalkeeper.position_group == PositionType.Goalkeeper + ) + assert ( + PositionType.CenterDefensiveMidfield.position_group + == PositionType.Midfielder + ) + assert ( + PositionType.AttackingMidfield.position_group + == PositionType.Midfielder + ) + assert PositionType.CenterBack.position_group == PositionType.Defender + assert PositionType.Striker.position_group == PositionType.Attacker + def test_periods(self, dataset): """It should create the periods""" assert len(dataset.metadata.periods) == 2 diff --git a/setup.py b/setup.py index c8d7757cd..7b74060c1 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ def setup_package(): "flask", "flask-cors", "pytest-httpserver", + "common-data-format-validator @ git+https://github.com/koenvo/common-data-format-validator.git@bugfix/packaging", ], "development": ["pre-commit==2.6.0"], "query": ["networkx>=2.4,<3"], From 8eaafbb748c2c88345e124c378476ff14b4b718b Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Thu, 30 Oct 2025 14:44:17 +0100 Subject: [PATCH 02/16] cleaned up CDF Serializer --- kloppy/domain/models/common.py | 68 +- kloppy/domain/models/tracking.py | 83 ++- .../serializers/tracking/cdf/__init__.py | 3 +- .../infra/serializers/tracking/cdf/helpers.py | 201 ++++++ .../serializers/tracking/cdf/serializer.py | 662 +++++++----------- kloppy/tests/test_cdf.py | 255 ++++++- setup.py | 2 +- 7 files changed, 818 insertions(+), 456 deletions(-) create mode 100644 kloppy/infra/serializers/tracking/cdf/helpers.py diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 001e036af..cf0a7f7fd 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -688,12 +688,16 @@ def to_mplsoccer(self): dim = BaseDims( left=self.pitch_dimensions.x_dim.min, right=self.pitch_dimensions.x_dim.max, - bottom=self.pitch_dimensions.y_dim.min - if not invert_y - else self.pitch_dimensions.y_dim.max, - top=self.pitch_dimensions.y_dim.max - if not invert_y - else self.pitch_dimensions.y_dim.min, + bottom=( + self.pitch_dimensions.y_dim.min + if not invert_y + else self.pitch_dimensions.y_dim.max + ), + top=( + self.pitch_dimensions.y_dim.max + if not invert_y + else self.pitch_dimensions.y_dim.min + ), width=self.pitch_dimensions.x_dim.max - self.pitch_dimensions.x_dim.min, length=self.pitch_dimensions.y_dim.max @@ -742,14 +746,16 @@ def to_mplsoccer(self): - self.pitch_dimensions.x_dim.min ), pad_multiplier=1, - aspect_equal=False - if self.pitch_dimensions.unit == Unit.NORMED - else True, + aspect_equal=( + False if self.pitch_dimensions.unit == Unit.NORMED else True + ), pitch_width=pitch_width, pitch_length=pitch_length, - aspect=pitch_width / pitch_length - if self.pitch_dimensions.unit == Unit.NORMED - else 1.0, + aspect=( + pitch_width / pitch_length + if self.pitch_dimensions.unit == Unit.NORMED + else 1.0 + ), ) return dim @@ -1191,7 +1197,7 @@ def pitch_dimensions(self) -> PitchDimensions: pitch_width=None, standardized=False, ) - + class CDFCoordinateSystem(ProviderCoordinateSystem): """ @@ -1215,20 +1221,20 @@ def vertical_orientation(self) -> VerticalOrientation: @property def pitch_dimensions(self) -> PitchDimensions: - return NormalizedPitchDimensions( - x_dim=Dimension( - -1 * self._pitch_length / 2, self._pitch_length / 2 - ), - y_dim=Dimension( - -1 * self._pitch_width / 2, self._pitch_width / 2 - ), - pitch_length = self._pitch_length, - pitch_width=self._pitch_width, - standardized=False, - ) - + return NormalizedPitchDimensions( + x_dim=Dimension( + -1 * self._pitch_length / 2, self._pitch_length / 2 + ), + y_dim=Dimension(-1 * self._pitch_width / 2, self._pitch_width / 2), + pitch_length=self._pitch_length, + pitch_width=self._pitch_width, + standardized=False, + ) + def __init__(self, base_coordinate_system: ProviderCoordinateSystem): - self._pitch_length = base_coordinate_system.pitch_dimensions.pitch_length + self._pitch_length = ( + base_coordinate_system.pitch_dimensions.pitch_length + ) self._pitch_width = base_coordinate_system.pitch_dimensions.pitch_width @@ -1869,8 +1875,7 @@ def to_records( *columns: Unpack[tuple[Column]], as_list: Literal[True] = True, **named_columns: NamedColumns, - ) -> List[Dict[str, Any]]: - ... + ) -> List[Dict[str, Any]]: ... @overload def to_records( @@ -1878,8 +1883,7 @@ def to_records( *columns: Unpack[tuple[Column]], as_list: Literal[False] = False, **named_columns: NamedColumns, - ) -> Iterable[Dict[str, Any]]: - ... + ) -> Iterable[Dict[str, Any]]: ... def to_records( self, @@ -1997,6 +2001,10 @@ def to_df( else: raise KloppyParameterError(f"Engine {engine} is not valid") + def to_cdf(self): + if self.dataset_type != DatasetType.TRACKING: + raise ValueError(f"to_cdf() is only supported for TrackingDataset") + def __repr__(self): return f"<{self.__class__.__name__} record_count={len(self.records)}>" diff --git a/kloppy/domain/models/tracking.py b/kloppy/domain/models/tracking.py index a34cb7405..106882581 100644 --- a/kloppy/domain/models/tracking.py +++ b/kloppy/domain/models/tracking.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Dict, Optional, Union, TYPE_CHECKING from kloppy.domain.models.common import DatasetType from kloppy.utils import ( @@ -7,6 +7,9 @@ docstring_inherit_attributes, ) +if TYPE_CHECKING: + from kloppy.io import FileLike, open_as_file + from .common import DataRecord, Dataset, Player from .pitch import Point, Point3D @@ -83,7 +86,6 @@ def frame_rate(self): @deprecated( "to_pandas will be removed in the future. Please use to_df instead." ) - def to_pandas( self, record_converter: Optional[Callable[[Frame], Dict]] = None, @@ -119,10 +121,77 @@ def generic_record_converter(frame: Frame): return pd.DataFrame.from_records( map(generic_record_converter, self.records) ) - - @property - def to_common_data_format(self)->[object]: - - return [] + + # Update the to_cdf method in Dataset class + def to_cdf( + self, + metadata_output_file: "FileLike", + tracking_output_file: "FileLike", + additional_metadata: Optional[Union[dict, "CdfMetaDataSchema"]] = None, + ) -> None: + """ + Export dataset to Common Data Format (CDF). + + Args: + metadata_output_file: File path or file-like object for metadata JSON output. + Must have .json extension if a string path. + tracking_output_file: File path or file-like object for tracking JSONL output. + Must have .jsonl extension if a string path. + additional_metadata: Additional metadata to include in the CDF output. + Can be a complete CdfMetaDataSchema TypedDict or a partial dict. + Supported top-level keys: 'competition', 'season', 'stadium', 'meta', 'match'. + Supports nested updates like {'stadium': {'id': '123'}}. + + Raises: + KloppyError: If the dataset is not a TrackingDataset. + ValueError: If file extensions are invalid. + + Examples: + >>> # Export to local files + >>> dataset.to_cdf( + ... metadata_output_file='metadata.json', + ... tracking_output_file='tracking.jsonl' + ... ) + + >>> # Export to S3 + >>> dataset.to_cdf( + ... metadata_output_file='s3://bucket/metadata.json', + ... tracking_output_file='s3://bucket/tracking.jsonl' + ... ) + + >>> # Export with partial metadata updates + >>> dataset.to_cdf( + ... metadata_output_file='metadata.json', + ... tracking_output_file='tracking.jsonl', + ... additional_metadata={ + ... 'competition': {'id': '123'}, + ... 'season': {'id': '2024'}, + ... 'stadium': {'id': '456', 'name': 'Stadium Name'} + ... } + ... ) + """ + from kloppy.domain import DatasetType + from kloppy.exceptions import KloppyError + from kloppy.infra.serializers.tracking.cdf import ( + CDFTrackingDataSerializer, + CDFOutputs, + ) + + serializer = CDFTrackingDataSerializer() + + # TODO: write files but also support non-local files, similar to how open_as_file supports non-local files + + # with write_as_file(metadata_output_file) as metadata_fp, \ + # write_as_file(tracking_output_file) as tracking_fp: + + # serializer.serialize( + # dataset=self, + # outputs=CDFOutputs( + # meta_data=metadata_fp, + # tracking_data=tracking_fp + # ), + # additional_metadata=additional_metadata + # ) + __all__ = ["Frame", "TrackingDataset", "PlayerData"] diff --git a/kloppy/infra/serializers/tracking/cdf/__init__.py b/kloppy/infra/serializers/tracking/cdf/__init__.py index 21944af32..633a69ce8 100644 --- a/kloppy/infra/serializers/tracking/cdf/__init__.py +++ b/kloppy/infra/serializers/tracking/cdf/__init__.py @@ -1,3 +1,4 @@ from kloppy.domain.models.common import CDFCoordinateSystem +from .serializer import CDFTrackingDataSerializer, CDFOutputs -__all__ = ["CDFCoordinateSystem"] \ No newline at end of file +__all__ = ["CDFCoordinateSystem", "CDFTrackingDataSerializer", "CDFOutputs"] diff --git a/kloppy/infra/serializers/tracking/cdf/helpers.py b/kloppy/infra/serializers/tracking/cdf/helpers.py new file mode 100644 index 000000000..439bb2659 --- /dev/null +++ b/kloppy/infra/serializers/tracking/cdf/helpers.py @@ -0,0 +1,201 @@ +from kloppy.domain import PositionType, Ground + +PERIODS_MAP = { + 1: "first_half", + 2: "second_half", + 3: "first_half_extratime", + 4: "second_half_extratime", + 5: "shootout", +} + + +def extract_team_players(team): + """Extract player IDs from a team.""" + return [player.player_id for player in team.players] + + +def get_player_coordinates(frame, ground: Ground): + """Create player data list for a team from frame coordinates.""" + players = [] + for player, coordinates in frame.players_coordinates.items(): + if player.team.ground == ground: + players.append( + { + "id": player.player_id, + "x": round(coordinates.x, 3), + "y": round(coordinates.y, 3), + "position": player.starting_position.code, + } + ) + return players + + +def get_ball_coordinates(frame): + if frame.ball_coordinates is not None: + return { + "x": round(frame.ball_coordinates.x, 3), + "y": round(frame.ball_coordinates.y, 3), + "z": round(frame.ball_coordinates.z, 3), + } + + # TODO: set to None after new CDF validator update + return {"x": None, "y": None, "z": None} + + +def initialize_period_tracking(periods): + """Initialize tracking dictionaries for all periods.""" + period_ids = [period.id for period in periods] + return { + "start_frame_id": {pid: None for pid in period_ids}, + "end_frame_id": {pid: None for pid in period_ids}, + "normalized_start_frame_id": {pid: None for pid in period_ids}, + "normalized_end_frame_id": {pid: None for pid in period_ids}, + "offset": {pid: 0 for pid in period_ids}, + } + + +def update_period_tracking(period_tracking, period_id, original_frame_id): + """Update period tracking information for the current frame.""" + if period_tracking["start_frame_id"][period_id] is None: + period_tracking["start_frame_id"][period_id] = original_frame_id + + if ( + period_id > 1 + and period_tracking["end_frame_id"][period_id - 1] is not None + ): + prev_period_length = ( + period_tracking["end_frame_id"][period_id - 1] + - period_tracking["start_frame_id"][period_id - 1] + + 1 + ) + period_tracking["offset"][period_id] = ( + period_tracking["offset"][period_id - 1] + prev_period_length + ) + + period_tracking["normalized_start_frame_id"][period_id] = ( + period_tracking["offset"][period_id] + ) + + period_tracking["end_frame_id"][period_id] = original_frame_id + + normalized_frame_id = ( + original_frame_id - period_tracking["start_frame_id"][period_id] + ) + period_tracking["offset"][period_id] + + period_tracking["normalized_end_frame_id"][period_id] = normalized_frame_id + + return normalized_frame_id + + +def get_starting_formation(team_players) -> str: + """ + determine the starting formation if not define. + + Args: + team: The team on which we want to infer the formation. + + Returns: + formation: the infered formation. + """ + default_formation = "4-3-3" + + defender = midfielder = attacker = 0 + for player in team_players: + if player.starting_position.position_group == None: + continue + elif player.starting_position.position_group == PositionType.Attacker: + attacker += 1 + elif ( + player.starting_position.position_group == PositionType.Midfielder + ): + midfielder += 1 + elif player.starting_position.position_group == PositionType.Defender: + defender += 1 + if defender + midfielder + attacker == 10: + return f"{defender}-{midfielder}-{attacker}" + elif defender + midfielder + attacker != 10: + return default_formation + return default_formation + + +def build_periods_info(dataset, period_tracking, home_team, away_team): + """Build period information for metadata.""" + periods_info = [] + for period in dataset.metadata.periods: + periods_info.append( + { + "period": PERIODS_MAP[period.id], + "play_direction": "left_right", + "start_time": str( + dataset.metadata.date + period.start_timestamp + ), + "end_time": str(dataset.metadata.date + period.end_timestamp), + "start_frame_id": period_tracking["normalized_start_frame_id"][ + period.id + ], + "end_frame_id": period_tracking["normalized_end_frame_id"][ + period.id + ], + "left_team_id": home_team.team_id, + "right_team_id": away_team.team_id, + } + ) + return periods_info + + +def build_whistles(periods_info): + """Build whistle events from period information.""" + whistles = [] + for period in periods_info: + whistles.append( + { + "type": period["period"], + "sub_type": "start", + "time": period["start_time"], + } + ) + whistles.append( + { + "type": period["period"], + "sub_type": "end", + "time": period["end_time"], + } + ) + return whistles + + +def get_starters_and_formation(team, first_frame): + """ + Extract starter IDs and determine formation from first frame. + + Returns: + tuple: (set of starter player IDs, formation string) + """ + team_starters = { + player.player_id + for player, _ in first_frame.players_coordinates.items() + if player.team == team + } + + starters_list = [p for p in team.players if p.player_id in team_starters] + + formation = team.starting_formation or get_starting_formation( + starters_list + ) + + return team_starters, formation + + +def build_team_players_metadata(team, starters): + """Build player metadata for a team.""" + players = [] + for player in team.players: + players.append( + { + "id": player.player_id, + "team_id": team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters, + } + ) + return players diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 99648067d..8fa9e55b4 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -1,79 +1,87 @@ import json import tempfile -from typing import IO, NamedTuple - -from kloppy.domain import Provider, TrackingDataset, PositionType +from typing import IO, NamedTuple, Optional, Union, TYPE_CHECKING + +from kloppy.domain import ( + Provider, + TrackingDataset, + Orientation, + BallState, + CDFCoordinateSystem, + Ground, +) from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer +from .helpers import ( + PERIODS_MAP, + get_player_coordinates, + get_ball_coordinates, + initialize_period_tracking, + update_period_tracking, + get_starters_and_formation, + build_periods_info, + build_whistles, + build_team_players_metadata, +) + +if TYPE_CHECKING: + from cdf.domain.latest.meta import ( + CdfMetaDataSchema, + Stadium, + Competition, + Season, + Meta, + Misc, + ) + +import warnings + +MISSING_MANDATORY_ID = "MISSING_MANDATORY_ID" + class CDFOutputs(NamedTuple): meta_data: IO[bytes] - tracking_data: list[IO[bytes]] + tracking_data: IO[bytes] class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): provider = Provider.CDF - # to infer the starting formation if not given - @staticmethod - def get_starting_formation(team_players) -> str: - """ - determine the starting formation if not define. - - Args: - team: The team on which we want to infer the formation. - - Returns: - formation: the infered formation. - """ - formation = "" - default_formation = "4-3-3" - defender = midfielder = attacker = 0 - for player in team_players: - if player.starting_position.position_group == None: - continue - elif ( - player.starting_position.position_group - == PositionType.Attacker - ): - attacker += 1 - elif ( - player.starting_position.position_group - == PositionType.Midfielder - ): - midfielder += 1 - elif ( - player.starting_position.position_group - == PositionType.Defender - ): - defender += 1 - if defender + midfielder + attacker == 10: - formation = f"{defender}-{midfielder}-{attacker}" - elif defender + midfielder + attacker != 10: - formation = default_formation - return formation - - def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: + def serialize( + self, + dataset: TrackingDataset, + outputs: CDFOutputs, + additional_metadata: Optional[ + Union[ + "CdfMetaDataSchema", + "Stadium", + "Competition", + "Season", + "Meta", + "Misc", + dict, + ] + ] = None, + ) -> bool: """ Serialize a TrackingDataset to Common Data Format. Args: dataset: The tracking dataset to serialize outputs: CDFOutputs containing file handles for metadata and tracking data + additional_metadata: Either a complete CdfMetaDataSchema or partial metadata + dict containing any of: 'competition', 'season', 'stadium', 'meta', 'misc'. + Can also contain direct field updates like {'stadium': {'id': '123'}}. Returns: - bool: True if serialization was successful, False otherwise + bool: True if serialization was successful """ + if all([True for x in dataset if x.ball_state == BallState.ALIVE]): + warnings.warn( + "All frames in 'tracking_dataset' are 'ALIVE', the Common Data Format expects 'DEAD' frames as well. Set `only_alive=False` in your kloppy `.load_tracking()` call to include 'DEAD' frames.", + UserWarning, + ) - from kloppy.domain import ( - Orientation, - BallState, - ) - - # builded coordinateSystem class. - from kloppy.domain.models.common import CDFCoordinateSystem - - # setting it as coordinate system of the imported data dataset = dataset.transform( to_coordinate_system=CDFCoordinateSystem( dataset.metadata.coordinate_system @@ -81,379 +89,233 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: to_orientation=Orientation.STATIC_HOME_AWAY, ) - ## building Tracking jsonl - # list of different periods within a game define by the cdf - periods = { - 1: "first_half", - 2: "second_half", - 3: "first_half_extratime", - 4: "second_half_extratime", - 5: "shootout", - } + period_tracking = initialize_period_tracking(dataset.metadata.periods) + self._home_team, self._away_team = dataset.metadata.teams - # container for start and end frame_id - period_start_frame_id = { - period.id: None for period in dataset.metadata.periods - } - period_end_frame_id = { - period.id: None for period in dataset.metadata.periods - } + self._serialize_tracking_frames( + dataset, + outputs, + period_tracking, + ) - # container for start and end normalized frame_id - normalized_period_start_frame_id = { - period.id: None for period in dataset.metadata.periods - } - normalized_period_end_frame_id = { - period.id: None for period in dataset.metadata.periods - } + self._serialize_metadata( + dataset, + outputs, + period_tracking, + additional_metadata or {}, + ) - # diffence of ids between frame_ids - period_offset = {period.id: 0 for period in dataset.metadata.periods} + return True - # Get home and away team data - home_team, away_team = dataset.metadata.teams + def _serialize_tracking_frames(self, dataset, outputs, period_tracking): + """Serialize tracking data frames to JSONL format. - # Get the players Id. - home_player_ids, away_player_ids = ( - [player.player_id for player in home_team.players], - [player.player_id for player in away_team.players], - ) + Iterates through all frames in the dataset and writes each frame's tracking + data (player positions, ball coordinates, timestamps) directly to the output + JSONL file. - frame_id = 0 # Use for the cdf_frame_ids.. + Args: + dataset: The kloppy tracking dataset containing frames to serialize. + outputs: CDFOutputs object containing the tracking data file handle. + period_tracking: Dictionary containing period frame ID tracking information. + """ for frame in dataset.frames: - frame_data = {} - # Frame ID specified by the CDF - frame_data["frame_id"] = frame_id - # Original frame_id - frame_data["original_frame_id"] = frame.frame_id - # Timestamp - frame_data["timestamp"] = str( - dataset.metadata.date + frame.timestamp - ) - # Period - frame_data["period"] = periods.get(frame.period.id, "unknownn") period_id = frame.period.id - # Update the start and end id for this period - if period_start_frame_id[period_id] is None: - period_start_frame_id[period_id] = frame_data[ - "original_frame_id" - ] - - if ( - period_id > 1 - and period_end_frame_id[period_id - 1] is not None - ): - prev_period_length = ( - period_end_frame_id[period_id - 1] - - period_start_frame_id[period_id - 1] - + 1 - ) - period_offset[period_id] = ( - period_offset[period_id - 1] + prev_period_length - ) - - # Set normalized start frame id - normalized_period_start_frame_id[period_id] = period_offset[ - period_id - ] - - period_end_frame_id[period_id] = frame_data["original_frame_id"] - - normalized_frame_id = ( - frame_data["original_frame_id"] - - period_start_frame_id[period_id] - ) + period_offset[period_id] - - # Update normalized end frame id - normalized_period_end_frame_id[period_id] = normalized_frame_id - - # Match ID - frame_data["match"] = {"id": str(dataset.metadata.game_id)} - # Ball status - frame_data["ball_status"] = frame.ball_state == BallState.ALIVE - - # Teams and players - home_players = [] - for player, coordinates in frame.players_coordinates.items(): - if player.player_id in home_player_ids: - try: - x = coordinates.x - y = coordinates.x - home_players.append( - { - "id": player.player_id, - "x": round(x, 3), - "y": round(y, 3), - "position": player.starting_position.code, - } - ) - except KeyError: - continue - - away_players = [] - for player, coordinates in frame.players_coordinates.items(): - if player.player_id in away_player_ids: - try: - x = coordinates.x - y = coordinates.x - away_players.append( - { - "id": player.player_id, - "x": round(x, 3), - "y": round(y, 3), - "position": player.starting_position.code, - } - ) - except KeyError: - continue - - # teams within the tracking data. - - home_players_id = [] - away_players_id = [] - for player, _ in frame.players_coordinates.items(): - if player.team == home_team: - home_players_id.append(player.player_id) - if player.team == away_team: - away_players_id.append(player.player_id) - set_of_home_players_id_in_the_frame = set(home_players_id) - set_of_away_players_id_in_the_frame = set(away_players_id) - - frame_data["teams"] = { + ball_status = frame.ball_state == BallState.ALIVE + + normalized_frame_id = update_period_tracking( + period_tracking, period_id, frame.frame_id + ) + + home_players = get_player_coordinates(frame, Ground.HOME) + away_players = get_player_coordinates(frame, Ground.AWAY) + + if period_id not in PERIODS_MAP: + raise ValueError( + f"Incorrect period_id {period_id}. Period ID {period_id} this is not supported by the Common Data Format" + ) + + frame_data = { + "frame_id": normalized_frame_id, + "original_frame_id": frame.frame_id, + "timestamp": str(dataset.metadata.date + frame.timestamp), + "period": PERIODS_MAP[period_id], + "match": {"id": str(dataset.metadata.game_id)}, + "ball_status": ball_status, + "teams": { + "home": { + "id": self._home_team.team_id, + "players": home_players, + "name": self._home_team.name, + }, + "away": { + "id": self._away_team.team_id, + "players": away_players, + "name": self._away_team.name, + }, + }, + "ball": get_ball_coordinates(frame), + } + + outputs.tracking_data.write( + (json.dumps(frame_data) + "\n").encode("utf-8") + ) + + def _build_default_metadata_structure( + self, + dataset, + period_tracking, + ) -> "CdfMetaDataSchema": + """Build default CDF metadata structure from dataset.""" + first_frame = dataset[0] + + home_starters, home_formation = get_starters_and_formation( + self._home_team, first_frame + ) + away_starters, away_formation = get_starters_and_formation( + self._away_team, first_frame + ) + + periods_info = build_periods_info( + dataset, period_tracking, self._home_team, self._away_team + ) + + whistles = build_whistles(periods_info) + + return { + "competition": { + "id": MISSING_MANDATORY_ID, + }, + "season": { + "id": MISSING_MANDATORY_ID, + }, + "stadium": { + "id": MISSING_MANDATORY_ID, + "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, + "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, + }, + "match": { + "id": str(dataset.metadata.game_id), + "kickoff_time": str( + dataset.metadata.date + + dataset.metadata.periods[0].start_timestamp + ), + "periods": periods_info, + "whistles": whistles, + "scheduled_kickoff_time": str(dataset.metadata.date), + }, + "teams": { "home": { - "id": home_team.team_id, - "players": home_players, - "jersey_color": " ", # - "name": home_team.name, - "formation": ( - home_team.formations.at_start() - if home_team.formations.items - else self.get_starting_formation( - [ - p - for p in home_team.players - if p.player_id - in set_of_home_players_id_in_the_frame - ] - ) + "id": self._home_team.team_id, + "players": build_team_players_metadata( + self._home_team, home_starters ), + "name": self._home_team.name, + "formation": home_formation, }, "away": { - "id": away_team.team_id, - "players": away_players, - "jersey_color": " ", - "name": away_team.name, - "formation": ( - away_team.formations.at_start() - if away_team.formations.items - else self.get_starting_formation( - [ - p - for p in away_team.players - if p.player_id - in set_of_away_players_id_in_the_frame - ] - ) + "id": self._away_team.team_id, + "players": build_team_players_metadata( + self._away_team, away_starters ), + "name": self._away_team.name, + "formation": away_formation, }, - } + }, + "meta": { + "video": None, + "tracking": None, + "landmarks": None, + "ball": None, + "meta": None, + "cdf": None, + }, + } + + def _deep_merge_metadata(self, base: dict, updates: dict) -> dict: + """ + Deep merge metadata updates into base metadata. + + Args: + base: Base metadata dictionary + updates: Updates to apply (can be nested) + + Returns: + Merged metadata dictionary + """ + result = base.copy() - # Ball + for key, value in updates.items(): if ( - frame_data["ball_status"] == True - and frame.ball_coordinates is not None + key in result + and isinstance(result[key], dict) + and isinstance(value, dict) ): - try: - ball_x = round(frame.ball_coordinates.x, 3) - ball_y = round(frame.ball_coordinates.y, 3) - ball_z = round(frame.ball_coordinates.z, 3) - except KeyError: - ball_x = ball_y = ball_z = None + result[key] = self._deep_merge_metadata(result[key], value) else: - ball_x = ( - ball_y - ) = ball_z = 404 # default missing value for ball coordinates + result[key] = value - frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} + return result - # update the frame_id - frame_id += 1 + def _internal_validation_metadata( + self, metadata: dict, path: str = "" + ) -> None: + """ + Validate metadata and warn about missing mandatory IDs. - # build a temporary jsonl for each frame - frame_file = tempfile.NamedTemporaryFile( - mode="w+b", suffix=".jsonl", delete=False - ) - frame_file.write((json.dumps(frame_data) + "\n").encode("utf-8")) - frame_file.flush() # make sure data is written - - # Add to tracking list - outputs.tracking_data.append(frame_file) - - ###################### build now the metadata. - # Output containers - metadata_json = {} - # Competition infos. - metadata_json["competition"] = { - "id": "MISSING_MANDATORY_COMPETITION_ID", - "name": "", - "format": "", - "age_restriction": "", - "type": "", - } + Args: + metadata: Metadata dictionary to validate + path: Current path in the metadata structure (for nested dicts) + """ + for key, value in metadata.items(): + current_path = f"{path}.{key}" if path else key + + if value == MISSING_MANDATORY_ID: + warnings.warn( + f"Missing mandatory ID at '{current_path}'. Currently replaced with the value '{MISSING_MANDATORY_ID}'. " + f"Please provide the correct value to 'additional_metadata' to completely adhere to the CDF specification.", + UserWarning, + ) + elif isinstance(value, dict): + self._internal_validation_metadata(value, current_path) + elif isinstance(value, list): + for i, item in enumerate(value): + if isinstance(item, dict): + self._internal_validation_metadata( + item, f"{current_path}[{i}]" + ) - # season infos. - metadata_json["season"] = { - "id": "MISSING_MANDATORY_SEASON_ID", - "name": "", - } + def _serialize_metadata( + self, + dataset, + outputs, + period_tracking, + additional_metadata: dict, + ): + """ + Serialize metadata to JSON format. - # match infos. - periods_info = [] - for period in dataset.metadata.periods: - curent_period = { - "period": periods[period.id], - "play_direction": "left_right", - "start_time": str( - dataset.metadata.date + period.start_timestamp - ), - "end_time": str(dataset.metadata.date + period.end_timestamp), - "start_frame_id": normalized_period_start_frame_id[period.id], - "end_frame_id": normalized_period_end_frame_id[period.id], - "left_team_id": home_team.team_id, - "right_team_id": away_team.team_id, - } - periods_info.append(curent_period) - - ## building team_players for metadata - meta_home_players = [] - starters_ids = [] - for player, coordinates in dataset[0].players_coordinates.items(): - starters_ids.append(player.player_id) - - for player in home_team.players: - try: - meta_home_players.append( - { - "id": player.player_id, - "team_id": home_team.team_id, - "jersey_number": player.jersey_no, - "is_starter": player.player_id in starters_ids, - } - ) - except KeyError: - continue - - meta_away_players = [] - for player in away_team.players: - try: - meta_away_players.append( - { - "id": player.player_id, - "team_id": away_team.team_id, - "jersey_number": player.jersey_no, - "is_starter": player.player_id in starters_ids, - } - ) - except KeyError: - continue - - # get whistles related to period directly from them. - whistles = [] - for period in periods_info: - whistle_start = {} - whistle_end = {} - # type - whistle_start["type"] = period["period"] - whistle_end["type"] = period["period"] - # sub_type - whistle_start["sub_type"] = "start" - whistle_end["sub_type"] = "end" - # time - whistle_start["time"] = period["start_time"] - whistle_end["time"] = period["end_time"] - whistles.append(whistle_start) - whistles.append(whistle_end) - - metadata_json["match"] = { - "id": str(dataset.metadata.game_id), - "kickoff_time": str( - dataset.metadata.date - + dataset.metadata.periods[0].start_timestamp - ), - "periods": periods_info, - "whistles": whistles, - "round": "", - "scheduled_kickoff_time": str(dataset.metadata.date), - "local_kickoff_time": "", - "misc": { - "country": "", - "city": "", - "percipitation": 0, - "is_open_roof": True, # Asume as default value - }, - } + Builds and writes the complete metadata JSON including competition, season, + match information, periods, whistles, team rosters with formations, and + stadium dimensions. Accepts additional metadata for overrides. - home_players_id_in_meta = [] - away_players_id_in_meta = [] - for player, _ in dataset[0].players_coordinates.items(): - if player.team == home_team: - home_players_id_in_meta.append(player.player_id) - if player.team == away_team: - away_players_id_in_meta.append(player.player_id) - meta_set_of_home_players_id_in_the_frame = set(home_players_id_in_meta) - meta_set_of_away_players_id_in_the_frame = set(away_players_id_in_meta) - - metadata_json["teams"] = { - "home": { - "id": home_team.team_id, - "players": meta_home_players, - "jersey_color": " ", - "name": home_team.name, - "formation": home_team.starting_formation - or self.get_starting_formation( - [ - p - for p in home_team.players - if p.player_id - in meta_set_of_home_players_id_in_the_frame - ] - ), - }, - "away": { - "id": away_team.team_id, - "players": meta_away_players, - "jersey_color": " ", - "name": away_team.name, - "formation": away_team.starting_formation - or self.get_starting_formation( - [ - p - for p in away_team.players - if p.player_id - in meta_set_of_away_players_id_in_the_frame - ] - ), - }, - } + Args: + dataset: The tracking dataset containing metadata to serialize. + outputs: CDFOutputs object containing the metadata file handle. + period_tracking: Dictionary containing normalized period frame IDs. + additional_metadata: Additional or override metadata following CdfMetaDataSchema. + """ + metadata_json = self._build_default_metadata_structure( + dataset, period_tracking + ) - metadata_json["stadium"] = { - "id": "MISSING_MANDATORY_STADIUM_ID", - "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, - "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, - "name": "", - "turf": "", - } + if additional_metadata: + metadata_json = self._deep_merge_metadata( + metadata_json, additional_metadata + ) - metadata_json["meta"] = { - "video": None, - "tracking": None, - "landmarks": None, - "meta": None, - "cdf": None, - } + self._internal_validation_metadata(metadata_json) outputs.meta_data.write( (json.dumps(metadata_json) + "\n").encode("utf-8") ) - - return True diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 67d07cec4..28d49dcaa 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -3,7 +3,9 @@ import pytest import cdf -from cdf import VERSION + +import json +import warnings from kloppy import sportec from kloppy.domain import TrackingDataset @@ -12,6 +14,7 @@ CDFOutputs, ) + class TestCDFSerializer: @pytest.fixture def raw_data(self, base_dir) -> Path: @@ -37,30 +40,248 @@ def test_produces_valid_cdf_output(self, dataset): serializer = CDFTrackingDataSerializer() # Instantiate Validators - meta_validator = cdf.MetaSchemaValidator(schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json") - tracking_validator = cdf.TrackingSchemaValidator(schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json") + meta_validator = cdf.MetaSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" + ) + tracking_validator = cdf.TrackingSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" + ) + + with tempfile.NamedTemporaryFile( + mode="w+b", suffix=".json", delete=False + ) as meta_file, tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) as tracking_file: - with tempfile.NamedTemporaryFile(mode="w+b", suffix=".json", delete=False) as meta_file: - # Initialize empty list for tracking files - tracking_files: list[tempfile._TemporaryFileWrapper] = [] # Instantiate the named tuple for outputs outputs = CDFOutputs( - meta_data=meta_file, - tracking_data=tracking_files + meta_data=meta_file, tracking_data=tracking_file ) - # Serialize the dataset - success = serializer.serialize(dataset, outputs) - assert success is True + + # Serialize the dataset and capture warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + success = serializer.serialize(dataset, outputs) + assert success is True + + # Verify warnings about missing mandatory IDs were raised + missing_id_warnings = [ + warning + for warning in w + if issubclass(warning.category, UserWarning) + and "Missing mandatory ID" in str(warning.message) + ] + + # Should have warnings for competition.id, season.id, and stadium.id + assert len(missing_id_warnings) == 3, ( + f"Expected 3 missing mandatory ID warnings, but got {len(missing_id_warnings)}: " + f"{[str(warning.message) for warning in missing_id_warnings]}" + ) + + # Check specific warnings are present + warning_messages = [ + str(warning.message) for warning in missing_id_warnings + ] + assert any( + "competition.id" in msg for msg in warning_messages + ), "Missing warning for competition.id" + assert any( + "season.id" in msg for msg in warning_messages + ), "Missing warning for season.id" + assert any( + "stadium.id" in msg for msg in warning_messages + ), "Missing warning for stadium.id" + # Save paths for validation after leaving the block meta_path = meta_file.name - tracking_paths = [f.name for f in outputs.tracking_data] + tracking_path = tracking_file.name # Validate metadata meta_validator.validate_schema(sample=meta_path) - # Validate all tracking frame files - for path in tracking_paths: - tracking_validator.validate_schema(sample=path) + # Validate tracking data - read and validate each line (frame) in the JSONL file + with open(tracking_path, "r") as f: + frame_count = 0 + for line in f: + if line.strip(): # Skip empty lines + frame_data = json.loads(line) + # Validate each frame against the tracking schema + tracking_validator.validate_schema(sample=frame_data) + frame_count += 1 + + assert frame_count > 0, "No frames were serialized" + + # Clean up + Path(meta_path).unlink() + Path(tracking_path).unlink() + + def test_produces_valid_cdf_output_with_additional_metadata(self, dataset): + """Test that CDFTrackingDataSerializer produces valid CDF output with additional metadata.""" + serializer = CDFTrackingDataSerializer() + + # Instantiate Validators + meta_validator = cdf.MetaSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" + ) + tracking_validator = cdf.TrackingSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" + ) + + # Define additional metadata + additional_metadata = { + "competition": { + "id": "COMP_123", + "name": "Test Competition", + "format": "league_20", + }, + "season": {"id": "SEASON_2024", "name": "2024/25"}, + "stadium": { + "id": "STADIUM_456", + "name": "Test Arena", + "turf": "grass", + }, + "meta": { + "tracking": { + "version": "2.0.0", + "name": "TestTracker", + "fps": 30, + "collection_timing": "live", + } + }, + } + + with tempfile.NamedTemporaryFile( + mode="w+b", suffix=".json", delete=False + ) as meta_file, tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) as tracking_file: + + # Instantiate the named tuple for outputs + outputs = CDFOutputs( + meta_data=meta_file, tracking_data=tracking_file + ) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + success = serializer.serialize( + dataset, outputs, additional_metadata=additional_metadata + ) + assert success is True + + # Verify no warnings about missing mandatory IDs were raised + missing_id_warnings = [ + warning + for warning in w + if issubclass(warning.category, UserWarning) + and "Missing mandatory ID" in str(warning.message) + ] + assert len(missing_id_warnings) == 0, ( + f"Expected no missing mandatory ID warnings, but got {len(missing_id_warnings)}: " + f"{[str(warning.message) for warning in missing_id_warnings]}" + ) + + # Save paths for validation after leaving the block + meta_path = meta_file.name + tracking_path = tracking_file.name + + # Validate metadata + meta_validator.validate_schema(sample=meta_path) + + # Verify additional metadata was applied correctly + with open(meta_path, "r") as f: + meta_data = json.load(f) + + # Check competition metadata + assert meta_data["competition"]["id"] == "COMP_123" + assert meta_data["competition"]["name"] == "Test Competition" + assert meta_data["competition"]["format"] == "league_20" + + # Check season metadata + assert meta_data["season"]["id"] == "SEASON_2024" + assert meta_data["season"]["name"] == "2024/25" + + # Check stadium metadata + assert meta_data["stadium"]["id"] == "STADIUM_456" + assert meta_data["stadium"]["name"] == "Test Arena" + assert meta_data["stadium"]["turf"] == "grass" + # Verify default values still present + assert "pitch_length" in meta_data["stadium"] + assert "pitch_width" in meta_data["stadium"] + + # Check meta tracking information + assert meta_data["meta"]["tracking"]["version"] == "2.0.0" + assert meta_data["meta"]["tracking"]["name"] == "TestTracker" + assert meta_data["meta"]["tracking"]["fps"] == 30 + assert meta_data["meta"]["tracking"]["collection_timing"] == "live" + + # Validate tracking data - read and validate each line (frame) in the JSONL file + with open(tracking_path, "r") as f: + frame_count = 0 + for line in f: + if line.strip(): # Skip empty lines + frame_data = json.loads(line) + # Validate each frame against the tracking schema + tracking_validator.validate_schema(sample=frame_data) + frame_count += 1 + + assert frame_count > 0, "No frames were serialized" + + # Clean up + Path(meta_path).unlink() + Path(tracking_path).unlink() + + def test_serializer_handles_invalid_metadata_types(self, dataset): + """Test that CDFTrackingDataSerializer handles invalid metadata types gracefully.""" + serializer = CDFTrackingDataSerializer() + + with tempfile.NamedTemporaryFile( + mode="w+b", suffix=".json", delete=False + ) as meta_file, tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) as tracking_file: + + meta_path = meta_file.name + tracking_path = tracking_file.name + + outputs = CDFOutputs( + meta_data=meta_file, tracking_data=tracking_file + ) + + # Test with invalid metadata types - should still serialize but may fail validation + invalid_metadata = { + "competition": { + "id": 123, # Should be string + }, + "season": { + "id": ["2024"], # Should be string, not list + }, + "stadium": { + "id": None, # Should be string + "pitch_length": "one hundred five", # Should be float/int + }, + "meta": { + "tracking": { + "fps": "25", # Should be int + "version": 1.0, # Should be string + } + }, + } + + # Serialization should succeed (no type checking in serializer) + success = serializer.serialize( + dataset, outputs, additional_metadata=invalid_metadata + ) + assert success is True + + # The file should be created but validation should fail + meta_validator = cdf.MetaSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" + ) + + # Validation should fail due to type mismatches + with pytest.raises(Exception): # Could be ValidationError or similar + meta_validator.validate_schema(sample=meta_path) + + # Clean up Path(meta_path).unlink() - for path in tracking_paths: - Path(path).unlink() \ No newline at end of file + Path(tracking_path).unlink() diff --git a/setup.py b/setup.py index 7b74060c1..4a5d85590 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ def setup_package(): "flask", "flask-cors", "pytest-httpserver", - "common-data-format-validator @ git+https://github.com/koenvo/common-data-format-validator.git@bugfix/packaging", + "common-data-format-validator>=0.0.11", ], "development": ["pre-commit==2.6.0"], "query": ["networkx>=2.4,<3"], From fbf20c6085d3b58e9dfcd505be78b66bc3d23c71 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Fri, 14 Nov 2025 11:20:24 +0100 Subject: [PATCH 03/16] WIP: add write support --- kloppy/infra/io/adapters/adapter.py | 17 ++++ kloppy/infra/io/adapters/fsspec.py | 29 ++++++- kloppy/io.py | 117 ++++++++++++++++++++++------ 3 files changed, 136 insertions(+), 27 deletions(-) diff --git a/kloppy/infra/io/adapters/adapter.py b/kloppy/infra/io/adapters/adapter.py index 7d00212ab..754ccad0b 100644 --- a/kloppy/infra/io/adapters/adapter.py +++ b/kloppy/infra/io/adapters/adapter.py @@ -19,6 +19,23 @@ def is_file(self, url: str) -> bool: def read_to_stream(self, url: str, output: BinaryIO): pass + def write_from_stream(self, url: str, input: BinaryIO, mode: str): + """ + Write content from input stream to the given URL. + + Args: + url: The destination URL + input: Binary stream to read from + mode: Write mode ('wb' for write/overwrite or 'ab' for append) + + Raises: + NotImplementedError: If write operations are not supported by this adapter + """ + raise NotImplementedError( + f"Write operations not supported for {url}. " + f"Adapter {self.__class__.__name__} does not implement write_from_stream." + ) + @abstractmethod def list_directory(self, url: str, recursive: bool = True) -> List[str]: pass diff --git a/kloppy/infra/io/adapters/fsspec.py b/kloppy/infra/io/adapters/fsspec.py index f303913e8..83c82fc36 100644 --- a/kloppy/infra/io/adapters/fsspec.py +++ b/kloppy/infra/io/adapters/fsspec.py @@ -6,6 +6,7 @@ from kloppy.config import get_config from kloppy.exceptions import InputNotFoundError +from kloppy.infra.io.buffered_stream import BufferedStream from .adapter import Adapter @@ -65,17 +66,41 @@ def supports(self, url: str) -> bool: def read_to_stream(self, url: str, output: BinaryIO): """ Reads content from the given URL and writes it to the provided binary stream. - Uses caching for remote files. + Uses caching for remote files. Copies data in chunks via BufferedStream. """ fs = self._get_filesystem(url) compression = self._detect_compression(url) try: with fs.open(url, "rb", compression=compression) as source_file: - output.write(source_file.read()) + buffer = BufferedStream.from_stream(source_file) + output.write(buffer.read()) except FileNotFoundError as e: raise InputNotFoundError(f"Input file not found: {url}") from e + def write_from_stream(self, url: str, input: BinaryIO, mode: str): + """ + Writes content from input stream to the given URL. + Does not use caching for writes. Copies data in chunks. + + Args: + url: The destination URL + input: BufferedStream to read from + mode: Write mode ('wb' for write/overwrite or 'ab' for append) + """ + fs = self._get_filesystem(url, no_cache=True) + compression = self._detect_compression(url) + + with fs.open(url, mode, compression=compression) as dest_file: + # Assume input is a BufferedStream with write_to method + if isinstance(input, BufferedStream): + input.write_to(dest_file) + else: + # Fallback: wrap in BufferedStream + input.seek(0) + buffer = BufferedStream.from_stream(input) + buffer.write_to(dest_file) + def list_directory(self, url: str, recursive: bool = True) -> List[str]: """ Lists the contents of a directory. diff --git a/kloppy/io.py b/kloppy/io.py index c6dd30180..c60f74fd1 100644 --- a/kloppy/io.py +++ b/kloppy/io.py @@ -7,6 +7,8 @@ import lzma import os import re +import shutil +import tempfile from dataclasses import dataclass, replace from io import BufferedWriter, BytesIO, TextIOWrapper from typing import ( @@ -26,6 +28,7 @@ from kloppy.exceptions import AdapterError, InputNotFoundError from kloppy.infra.io.adapters import get_adapter +from kloppy.infra.io.buffered_stream import BufferedStream logger = logging.getLogger(__name__) @@ -321,8 +324,35 @@ def dummy_context_mgr() -> Generator[None, None, None]: yield -def open_as_file(input_: FileLike) -> ContextManager[Optional[BinaryIO]]: - """Open a byte stream to the given input object. +@contextlib.contextmanager +def _write_context_manager( + uri: str, mode: str +) -> Generator[BinaryIO, None, None]: + """ + Context manager for write operations that buffers writes and flushes to adapter on exit. + + Args: + uri: The destination URI + mode: Write mode ('wb' or 'ab') + + Yields: + A BufferedStream for writing + """ + buffer = BufferedStream() + try: + yield buffer + finally: + adapter = get_adapter(uri) + if adapter: + adapter.write_from_stream(uri, buffer, mode) + else: + raise AdapterError(f"No adapter found for {uri}") + + +def open_as_file( + input_: FileLike, mode: str = "rb" +) -> ContextManager[Optional[BinaryIO]]: + """Open a byte stream to/from the given input object. The following input types are supported: - A string or `pathlib.Path` object representing a local file path. @@ -338,37 +368,54 @@ def open_as_file(input_: FileLike) -> ContextManager[Optional[BinaryIO]]: input types. Args: - input_ (FileLike): The input object to be opened. + input_ (FileLike): The input/output object to be opened. + mode (str): File mode - 'rb' (read), 'wb' (write), or 'ab' (append). + Defaults to 'rb'. Returns: - BinaryIO: A binary stream to the input object. + BinaryIO: A binary stream to/from the input object. Raises: - ValueError: If the input is required but not provided. + ValueError: If the input is required but not provided, or invalid mode. InputNotFoundError: If the input file is not found and should not be skipped. TypeError: If the input type is not supported. + NotImplementedError: If write mode is used with unsupported input types. Example: + >>> # Reading >>> with open_as_file("example.txt") as f: ... contents = f.read() + >>> + >>> # Writing + >>> with open_as_file("output.txt", mode="wb") as f: + ... f.write(b"Hello, world!") Note: To support reading data from other sources, see the [Adapter](`kloppy.io.adapters.Adapter`) class. If the given file path or URL ends with '.gz', '.xz', or '.bz2', the - file will be decompressed before being read. + file will be automatically compressed/decompressed. + + Write mode limitations: + - HTTP/HTTPS URLs: Not supported + - Zip archives: Not supported + - Inline strings/bytes: Not supported (invalid output destination) """ + # Validate mode + if mode not in ("rb", "wb", "ab"): + raise ValueError(f"Mode '{mode}' not supported. Use 'rb', 'wb', or 'ab'.") + + # Handle Source wrapper if isinstance(input_, Source): if input_.data is None and input_.optional: - # This saves us some additional code in every vendor specific code return dummy_context_mgr() elif input_.data is None: raise ValueError("Input required but not provided.") else: try: - return open_as_file(input_.data) + return open_as_file(input_.data, mode=mode) except InputNotFoundError as exc: if input_.skip_if_missing: logging.info(f"Input {input_.data} not found. Skipping") @@ -376,34 +423,54 @@ def open_as_file(input_: FileLike) -> ContextManager[Optional[BinaryIO]]: else: raise exc - if isinstance(input_, str) and ("{" in input_ or "<" in input_): - # If input_ is a JSON or XML string, return it as a binary stream - return BytesIO(input_.encode("utf8")) - - if isinstance(input_, bytes): - # If input_ is a bytes object, return it as a binary stream - return BytesIO(input_) - + # Write modes: Cannot write to inline data + if mode in ("wb", "ab"): + if isinstance(input_, str) and ("{" in input_ or "<" in input_): + raise TypeError("Cannot write to inline JSON/XML string.") + if isinstance(input_, bytes): + raise TypeError("Cannot write to bytes object. Use BytesIO instead.") + + # Read modes: Handle inline data + if mode == "rb": + if isinstance(input_, str) and ("{" in input_ or "<" in input_): + return BytesIO(input_.encode("utf8")) + if isinstance(input_, bytes): + return BytesIO(input_) + + # Handle paths (local files, URLs, S3, etc.) if isinstance(input_, str) or hasattr(input_, "__fspath__"): - # If input_ is a path-like object, open it and return the binary stream uri = _filepath_from_path_or_filelike(input_) adapter = get_adapter(uri) - if adapter: - stream = BytesIO() + if not adapter: + raise AdapterError(f"No adapter found for {uri}") + + if mode == "rb": + # Read mode: buffer data from adapter + stream = BufferedStream() adapter.read_to_stream(uri, stream) stream.seek(0) + return stream else: - raise AdapterError(f"No adapter found for {uri}") - return stream + # Write mode: return context manager that flushes on exit + return _write_context_manager(uri, mode) + # Handle file-like objects if isinstance(input_, TextIOWrapper): - # If file_or_path is a TextIOWrapper, return its underlying binary buffer return input_.buffer - if hasattr(input_, "readinto"): - # If file_or_path is a file-like object, return it as is - return _open(input_) # type: ignore + if hasattr(input_, "readinto") or (mode in ("wb", "ab") and hasattr(input_, "write")): + # File-like object (BytesIO, file handles, etc.) + if hasattr(input_, "mode") and input_.mode != mode: # type: ignore + # If it's a real file with a mode, check compatibility + raise ValueError(f"File opened in mode '{input_.mode}' but '{mode}' requested") # type: ignore + + # Use _open to handle potential compression detection + if mode == "rb": + return _open(input_, mode) # type: ignore + else: + # For write modes, return file-like object directly with nullcontext + return contextlib.nullcontext(input_) # type: ignore raise TypeError(f"Unsupported input type: {type(input_)}") From b5d77086a2ab5230d9d30e09e2b9cd11b23b261d Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Fri, 14 Nov 2025 17:44:26 +0100 Subject: [PATCH 04/16] Add write support to open_as_file with chunked buffering This adds comprehensive write support to the open_as_file() function with efficient memory management and streaming capabilities. Key features: - BufferedStream: SpooledTemporaryFile wrapper with chunked I/O (5MB memory threshold) - Write modes: 'wb' (write), 'ab' (append) - binary only - Adapter pattern: write_from_stream() method (opt-in for adapters) - Compression support: .gz, .bz2, .xz files handled automatically - Local files and S3 URIs supported via FSSpecAdapter - Protocols for type safety: SupportsRead, SupportsWrite Implementation details: - read_from()/write_to() methods use shutil.copyfileobj for chunked copying - Context manager pattern buffers writes and flushes on exit - No breaking changes to existing read functionality --- kloppy/infra/io/adapters/adapter.py | 10 +- kloppy/infra/io/adapters/fsspec.py | 23 ++--- kloppy/infra/io/buffered_stream.py | 78 +++++++++++++++ kloppy/io.py | 12 ++- kloppy/tests/test_write_support.py | 147 ++++++++++++++++++++++++++++ 5 files changed, 248 insertions(+), 22 deletions(-) create mode 100644 kloppy/infra/io/buffered_stream.py create mode 100644 kloppy/tests/test_write_support.py diff --git a/kloppy/infra/io/adapters/adapter.py b/kloppy/infra/io/adapters/adapter.py index 754ccad0b..31abb8f70 100644 --- a/kloppy/infra/io/adapters/adapter.py +++ b/kloppy/infra/io/adapters/adapter.py @@ -1,6 +1,8 @@ from abc import ABC, abstractmethod from typing import BinaryIO, List +from kloppy.infra.io.buffered_stream import BufferedStream + class Adapter(ABC): @abstractmethod @@ -16,16 +18,16 @@ def is_file(self, url: str) -> bool: pass @abstractmethod - def read_to_stream(self, url: str, output: BinaryIO): + def read_to_stream(self, url: str, output: BufferedStream): pass - def write_from_stream(self, url: str, input: BinaryIO, mode: str): + def write_from_stream(self, url: str, input: BufferedStream, mode: str): """ - Write content from input stream to the given URL. + Write content from BufferedStream to the given URL. Args: url: The destination URL - input: Binary stream to read from + input: BufferedStream to read from mode: Write mode ('wb' for write/overwrite or 'ab' for append) Raises: diff --git a/kloppy/infra/io/adapters/fsspec.py b/kloppy/infra/io/adapters/fsspec.py index 83c82fc36..e2cfe2569 100644 --- a/kloppy/infra/io/adapters/fsspec.py +++ b/kloppy/infra/io/adapters/fsspec.py @@ -6,6 +6,7 @@ from kloppy.config import get_config from kloppy.exceptions import InputNotFoundError + from kloppy.infra.io.buffered_stream import BufferedStream from .adapter import Adapter @@ -63,24 +64,23 @@ def supports(self, url: str) -> bool: Check if the adapter can handle the URL. """ - def read_to_stream(self, url: str, output: BinaryIO): + def read_to_stream(self, url: str, output: BufferedStream): """ - Reads content from the given URL and writes it to the provided binary stream. - Uses caching for remote files. Copies data in chunks via BufferedStream. + Reads content from the given URL and writes it to the provided BufferedStream. + Uses caching for remote files. Copies data in chunks. """ fs = self._get_filesystem(url) compression = self._detect_compression(url) try: with fs.open(url, "rb", compression=compression) as source_file: - buffer = BufferedStream.from_stream(source_file) - output.write(buffer.read()) + output.read_from(source_file) except FileNotFoundError as e: raise InputNotFoundError(f"Input file not found: {url}") from e - def write_from_stream(self, url: str, input: BinaryIO, mode: str): + def write_from_stream(self, url: str, input: BufferedStream, mode: str): """ - Writes content from input stream to the given URL. + Writes content from BufferedStream to the given URL. Does not use caching for writes. Copies data in chunks. Args: @@ -92,14 +92,7 @@ def write_from_stream(self, url: str, input: BinaryIO, mode: str): compression = self._detect_compression(url) with fs.open(url, mode, compression=compression) as dest_file: - # Assume input is a BufferedStream with write_to method - if isinstance(input, BufferedStream): - input.write_to(dest_file) - else: - # Fallback: wrap in BufferedStream - input.seek(0) - buffer = BufferedStream.from_stream(input) - buffer.write_to(dest_file) + input.write_to(dest_file) def list_directory(self, url: str, recursive: bool = True) -> List[str]: """ diff --git a/kloppy/infra/io/buffered_stream.py b/kloppy/infra/io/buffered_stream.py new file mode 100644 index 000000000..d2d65f963 --- /dev/null +++ b/kloppy/infra/io/buffered_stream.py @@ -0,0 +1,78 @@ +"""Buffered stream utilities for efficient I/O operations.""" + +import shutil +import tempfile +from typing import BinaryIO, Protocol + +DEFAULT_BUFFER_SIZE = 5 * 1024 * 1024 # 5MB before spilling to disk + + +class SupportsWrite(Protocol): + """Protocol for objects that support write operations.""" + + def write(self, data: bytes) -> int: + ... + + +class SupportsRead(Protocol): + """Protocol for objects that support read operations.""" + + def read(self, n: int) -> bytes: + ... + + +class BufferedStream(tempfile.SpooledTemporaryFile): + """A spooled temporary file that can efficiently copy from streams in chunks.""" + + def __init__(self, max_size: int = DEFAULT_BUFFER_SIZE, mode: str = "w+b"): + super().__init__(max_size=max_size, mode=mode) + + def write(self, data: bytes) -> int: # make it clearly bytes-only + return super().write(data) + + def read(self, n: int = -1) -> bytes: # make it clearly bytes-only + return super().read(n) + + @classmethod + def from_stream( + cls, + source: BinaryIO, + max_size: int = DEFAULT_BUFFER_SIZE, + chunk_size: int = 0, + ) -> "BufferedStream": + """ + Create a BufferedStream by copying data from source stream in chunks. + + Args: + source: The source binary stream to read from + max_size: Maximum size to keep in memory before spilling to disk + chunk_size: Size of chunks to keep in memory before spilling to disk + + Returns: + A BufferedStream containing the copied data + """ + buffer = cls(max_size=max_size) + buffer.read_from(source, chunk_size) + return buffer + + def read_from(self, source: SupportsRead, chunk_size: int = 0): + """ + Read data from source into this BufferedStream in chunks. + + Args: + source: The source that supports read() method + chunk_size: Size of chunks to copy at a time (0 uses default) + """ + shutil.copyfileobj(source, self, chunk_size) + self.seek(0) + + def write_to(self, output: SupportsWrite, chunk_size: int = 0) -> None: + """ + Write all contents of this BufferedStream to the output in chunks. + + Args: + output: The destination that supports write() method + chunk_size: Size of chunks to keep in memory before spilling to disk + """ + self.seek(0) + shutil.copyfileobj(self, output, chunk_size) diff --git a/kloppy/io.py b/kloppy/io.py index c60f74fd1..aa8bb3bfb 100644 --- a/kloppy/io.py +++ b/kloppy/io.py @@ -405,7 +405,9 @@ def open_as_file( """ # Validate mode if mode not in ("rb", "wb", "ab"): - raise ValueError(f"Mode '{mode}' not supported. Use 'rb', 'wb', or 'ab'.") + raise ValueError( + f"Mode '{mode}' not supported. Use 'rb', 'wb', or 'ab'." + ) # Handle Source wrapper if isinstance(input_, Source): @@ -428,7 +430,9 @@ def open_as_file( if isinstance(input_, str) and ("{" in input_ or "<" in input_): raise TypeError("Cannot write to inline JSON/XML string.") if isinstance(input_, bytes): - raise TypeError("Cannot write to bytes object. Use BytesIO instead.") + raise TypeError( + "Cannot write to bytes object. Use BytesIO instead." + ) # Read modes: Handle inline data if mode == "rb": @@ -459,7 +463,9 @@ def open_as_file( if isinstance(input_, TextIOWrapper): return input_.buffer - if hasattr(input_, "readinto") or (mode in ("wb", "ab") and hasattr(input_, "write")): + if hasattr(input_, "readinto") or ( + mode in ("wb", "ab") and hasattr(input_, "write") + ): # File-like object (BytesIO, file handles, etc.) if hasattr(input_, "mode") and input_.mode != mode: # type: ignore # If it's a real file with a mode, check compatibility diff --git a/kloppy/tests/test_write_support.py b/kloppy/tests/test_write_support.py new file mode 100644 index 000000000..f625f9575 --- /dev/null +++ b/kloppy/tests/test_write_support.py @@ -0,0 +1,147 @@ +import bz2 +import gzip +import lzma +from io import BytesIO +from pathlib import Path +from typing import BinaryIO, List + +import pytest + +from kloppy.io import open_as_file +from kloppy.infra.io.buffered_stream import BufferedStream +from kloppy.infra.io.adapters import Adapter + + +class TestBufferedStream: + """Tests for BufferedStream chunked copying.""" + + def test_from_stream_small_data(self): + """It should copy small data in chunks and keep in memory.""" + source = BytesIO(b"Small data content") + buffer = BufferedStream.from_stream(source, chunk_size=8) + + assert buffer.read() == b"Small data content" + assert buffer._rolled is False # Still in memory + + def test_from_stream_large_data(self): + """It should spill large data to disk.""" + buffer_size = 5 * 1024 * 1024 # 5MB + large_data = b"x" * (buffer_size + 1000) + source = BytesIO(large_data) + buffer = BufferedStream.from_stream(source, max_size=buffer_size) + + assert buffer._rolled is True # Spilled to disk + assert buffer.read() == large_data + + +class MockWriteAdapter(Adapter): + """Mock adapter for testing write support.""" + + def __init__(self): + self.written_data = {} + + def supports(self, url: str) -> bool: + return url.startswith("mock://") + + def is_directory(self, url: str) -> bool: + return False + + def is_file(self, url: str) -> bool: + return url in self.written_data + + def read_to_stream(self, url: str, output: BinaryIO): + if url in self.written_data: + output.write(self.written_data[url]) + else: + raise FileNotFoundError(f"Mock file not found: {url}") + + def write_from_stream(self, url: str, input: BinaryIO, mode: str): + """Write data from input stream to mock storage.""" + input.seek(0) + self.written_data[url] = input.read() + + def list_directory(self, url: str, recursive: bool = True) -> List[str]: + return [] + + +class TestOpenAsFileWrite: + """Tests for write support in open_as_file.""" + + def test_write_local_file(self, tmp_path: Path): + """It should be able to write to a local file.""" + output_path = tmp_path / "output.txt" + with open_as_file(output_path, mode="wb") as fp: + assert fp is not None + fp.write(b"Hello, write!") + + assert output_path.read_bytes() == b"Hello, write!" + + def test_write_compressed_gz(self, tmp_path: Path): + """It should be able to write compressed gzip files.""" + output_path = tmp_path / "output.txt.gz" + with open_as_file(output_path, mode="wb") as fp: + assert fp is not None + fp.write(b"Compressed content") + + # Verify by reading back + with gzip.open(output_path, "rb") as f: + assert f.read() == b"Compressed content" + + def test_write_compressed_bz2(self, tmp_path: Path): + """It should be able to write compressed bz2 files.""" + output_path = tmp_path / "output.txt.bz2" + with open_as_file(output_path, mode="wb") as fp: + assert fp is not None + fp.write(b"BZ2 content") + + with bz2.open(output_path, "rb") as f: + assert f.read() == b"BZ2 content" + + def test_write_compressed_xz(self, tmp_path: Path): + """It should be able to write compressed xz files.""" + output_path = tmp_path / "output.txt.xz" + with open_as_file(output_path, mode="wb") as fp: + assert fp is not None + fp.write(b"XZ content") + + with lzma.open(output_path, "rb") as f: + assert f.read() == b"XZ content" + + def test_write_bytesio(self): + """It should be able to write to BytesIO.""" + buffer = BytesIO() + with open_as_file(buffer, mode="wb") as fp: + assert fp is not None + fp.write(b"In-memory write") + + buffer.seek(0) + assert buffer.read() == b"In-memory write" + + +class TestAdapterWrite: + """Tests for adapter write support.""" + + def test_write_via_adapter(self, monkeypatch): + """It should use adapter's write_from_stream for remote writes.""" + from kloppy.infra.io import adapters + + mock_adapter = MockWriteAdapter() + # Inject our mock adapter + original_adapters = adapters.adapters + monkeypatch.setattr( + adapters, "adapters", [mock_adapter] + original_adapters + ) + + # Write via adapter + with open_as_file("mock://test/file.txt", mode="wb") as fp: + fp.write(b"Adapter write test") + + # Verify data was written to mock storage + assert ( + mock_adapter.written_data["mock://test/file.txt"] + == b"Adapter write test" + ) + + # Verify we can read it back + with open_as_file("mock://test/file.txt") as fp: + assert fp.read() == b"Adapter write test" From 3e1073a88fa75a622e14123a850db94d2110c93f Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 18 Nov 2025 12:08:02 +0100 Subject: [PATCH 05/16] Follow output pattern for SportsCode --- kloppy/_providers/sportscode.py | 5 +++-- kloppy/infra/io/adapters/fsspec.py | 1 - kloppy/infra/serializers/code/base.py | 11 +++++----- kloppy/infra/serializers/code/sportscode.py | 24 ++++++++++++++------- kloppy/tests/test_xml.py | 11 ++++++++-- 5 files changed, 34 insertions(+), 18 deletions(-) diff --git a/kloppy/_providers/sportscode.py b/kloppy/_providers/sportscode.py index 3ce84e3cf..0f7b762cf 100644 --- a/kloppy/_providers/sportscode.py +++ b/kloppy/_providers/sportscode.py @@ -3,6 +3,7 @@ SportsCodeDeserializer, SportsCodeInputs, SportsCodeSerializer, + SportsCodeOutputs, ) from kloppy.io import FileLike, open_as_file @@ -31,6 +32,6 @@ def save(dataset: CodeDataset, output_filename: str) -> None: dataset: The SportsCode dataset to save. output_filename: The output filename. """ - with open(output_filename, "wb") as fp: + with open_as_file(output_filename, "wb") as data_fp: serializer = SportsCodeSerializer() - fp.write(serializer.serialize(dataset)) + serializer.serialize(dataset, outputs=SportsCodeOutputs(data=data_fp)) diff --git a/kloppy/infra/io/adapters/fsspec.py b/kloppy/infra/io/adapters/fsspec.py index e2cfe2569..f97ae7765 100644 --- a/kloppy/infra/io/adapters/fsspec.py +++ b/kloppy/infra/io/adapters/fsspec.py @@ -32,7 +32,6 @@ def _get_filesystem( Get the appropriate fsspec filesystem for the given URL, with caching enabled. """ protocol = self._infer_protocol(url) - if no_cache: return fsspec.filesystem(protocol) diff --git a/kloppy/infra/serializers/code/base.py b/kloppy/infra/serializers/code/base.py index 7c38d9519..8ebe3cf7f 100644 --- a/kloppy/infra/serializers/code/base.py +++ b/kloppy/infra/serializers/code/base.py @@ -4,16 +4,17 @@ from kloppy.domain import CodeDataset -T = TypeVar("T") +T_I = TypeVar("T_I") +T_O = TypeVar("T_O") -class CodeDataDeserializer(ABC, Generic[T]): +class CodeDataDeserializer(ABC, Generic[T_I]): @abstractmethod - def deserialize(self, inputs: T) -> CodeDataset: + def deserialize(self, inputs: T_I) -> CodeDataset: raise NotImplementedError -class CodeDataSerializer(ABC): +class CodeDataSerializer(ABC, Generic[T_O]): @abstractmethod - def serialize(self, dataset: CodeDataset) -> bytes: + def serialize(self, dataset: CodeDataset, outputs: T_O) -> bool: raise NotImplementedError diff --git a/kloppy/infra/serializers/code/sportscode.py b/kloppy/infra/serializers/code/sportscode.py index c4ccb8c10..52bf3bdcf 100644 --- a/kloppy/infra/serializers/code/sportscode.py +++ b/kloppy/infra/serializers/code/sportscode.py @@ -46,6 +46,10 @@ class SportsCodeInputs(NamedTuple): data: IO[bytes] +class SportsCodeOutputs(NamedTuple): + data: IO[bytes] + + class SportsCodeDeserializer(CodeDataDeserializer[SportsCodeInputs]): def deserialize(self, inputs: SportsCodeInputs) -> CodeDataset: all_instances = objectify.fromstring(inputs.data.read()) @@ -89,8 +93,10 @@ def deserialize(self, inputs: SportsCodeInputs) -> CodeDataset: ) -class SportsCodeSerializer(CodeDataSerializer): - def serialize(self, dataset: CodeDataset) -> bytes: +class SportsCodeSerializer(CodeDataSerializer[SportsCodeOutputs]): + def serialize( + self, dataset: CodeDataset, outputs: SportsCodeOutputs + ) -> bool: root = etree.Element("file") all_instances = etree.SubElement(root, "ALL_INSTANCES") for i, code in enumerate(dataset.codes): @@ -139,10 +145,12 @@ def serialize(self, dataset: CodeDataset) -> bytes: text_ = etree.SubElement(label, "text") text_.text = str(text) - return etree.tostring( - root, - pretty_print=True, - xml_declaration=True, - encoding="utf-8", # This might not work with some tools because they expected 'ascii'. - method="xml", + outputs.data.write( + etree.tostring( + root, + pretty_print=True, + xml_declaration=True, + encoding="utf-8", # This might not work with some tools because they expected 'ascii'. + method="xml", + ) ) diff --git a/kloppy/tests/test_xml.py b/kloppy/tests/test_xml.py index bd663285e..4e745f235 100644 --- a/kloppy/tests/test_xml.py +++ b/kloppy/tests/test_xml.py @@ -1,11 +1,15 @@ from datetime import timedelta +from io import BytesIO from pandas import DataFrame from pandas._testing import assert_frame_equal from kloppy import sportscode from kloppy.domain import Period -from kloppy.infra.serializers.code.sportscode import SportsCodeSerializer +from kloppy.infra.serializers.code.sportscode import ( + SportsCodeSerializer, + SportsCodeOutputs, +) class TestXMLCodeTracking: @@ -80,7 +84,10 @@ def test_correct_serialization(self, base_dir): dataset.codes[1].period = dataset.metadata.periods[1] serializer = SportsCodeSerializer() - output = serializer.serialize(dataset) + with BytesIO() as buffer: + serializer.serialize(dataset, SportsCodeOutputs(data=buffer)) + buffer.seek(0) + output = buffer.read() expected_output = """ From f4191050b80adf031e5821464e07b79337770efa Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Thu, 11 Dec 2025 12:19:51 +0100 Subject: [PATCH 06/16] working writer --- kloppy/domain/models/tracking.py | 26 +++++++++++-------- .../infra/serializers/tracking/cdf/helpers.py | 19 +++++++++----- .../serializers/tracking/cdf/serializer.py | 4 ++- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/kloppy/domain/models/tracking.py b/kloppy/domain/models/tracking.py index 106882581..87c1d1868 100644 --- a/kloppy/domain/models/tracking.py +++ b/kloppy/domain/models/tracking.py @@ -176,22 +176,26 @@ def to_cdf( CDFTrackingDataSerializer, CDFOutputs, ) + from kloppy.io import FileLike, open_as_file serializer = CDFTrackingDataSerializer() # TODO: write files but also support non-local files, similar to how open_as_file supports non-local files - # with write_as_file(metadata_output_file) as metadata_fp, \ - # write_as_file(tracking_output_file) as tracking_fp: - - # serializer.serialize( - # dataset=self, - # outputs=CDFOutputs( - # meta_data=metadata_fp, - # tracking_data=tracking_fp - # ), - # additional_metadata=additional_metadata - # ) + # Use open_as_file with mode="wb" for writing + with open_as_file( + metadata_output_file, mode="wb" + ) as metadata_fp, open_as_file( + tracking_output_file, mode="wb" + ) as tracking_fp: + + serializer.serialize( + dataset=self, + outputs=CDFOutputs( + meta_data=metadata_fp, tracking_data=tracking_fp + ), + additional_metadata=additional_metadata, + ) __all__ = ["Frame", "TrackingDataset", "PlayerData"] diff --git a/kloppy/infra/serializers/tracking/cdf/helpers.py b/kloppy/infra/serializers/tracking/cdf/helpers.py index 439bb2659..648170501 100644 --- a/kloppy/infra/serializers/tracking/cdf/helpers.py +++ b/kloppy/infra/serializers/tracking/cdf/helpers.py @@ -1,4 +1,4 @@ -from kloppy.domain import PositionType, Ground +from kloppy.domain import PositionType, Ground, Point3D PERIODS_MAP = { 1: "first_half", @@ -32,12 +32,17 @@ def get_player_coordinates(frame, ground: Ground): def get_ball_coordinates(frame): if frame.ball_coordinates is not None: - return { - "x": round(frame.ball_coordinates.x, 3), - "y": round(frame.ball_coordinates.y, 3), - "z": round(frame.ball_coordinates.z, 3), - } - + if isinstance(frame.ball_coordinates, Point3D): + return { + "x": round(frame.ball_coordinates.x, 3), + "y": round(frame.ball_coordinates.y, 3), + "z": round(frame.ball_coordinates.z, 3), + } + else: + return { + "x": round(frame.ball_coordinates.x, 3), + "y": round(frame.ball_coordinates.y, 3), + } # TODO: set to None after new CDF validator update return {"x": None, "y": None, "z": None} diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 8fa9e55b4..9790d8f99 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -76,7 +76,9 @@ def serialize( Returns: bool: True if serialization was successful """ - if all([True for x in dataset if x.ball_state == BallState.ALIVE]): + if all( + True if x.ball_state == BallState.ALIVE else False for x in dataset + ): warnings.warn( "All frames in 'tracking_dataset' are 'ALIVE', the Common Data Format expects 'DEAD' frames as well. Set `only_alive=False` in your kloppy `.load_tracking()` call to include 'DEAD' frames.", UserWarning, From 7993ec80f5e085057d757330cea93842a79e984f Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 11:20:26 +0100 Subject: [PATCH 07/16] cdf improve write --- .../infra/serializers/tracking/cdf/helpers.py | 67 +++++++++++++++++-- .../serializers/tracking/cdf/serializer.py | 5 +- kloppy/tests/test_cdf.py | 59 +++++++++++++++- 3 files changed, 121 insertions(+), 10 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/helpers.py b/kloppy/infra/serializers/tracking/cdf/helpers.py index 648170501..2673d482c 100644 --- a/kloppy/infra/serializers/tracking/cdf/helpers.py +++ b/kloppy/infra/serializers/tracking/cdf/helpers.py @@ -1,5 +1,7 @@ from kloppy.domain import PositionType, Ground, Point3D +import warnings + PERIODS_MAP = { 1: "first_half", 2: "second_half", @@ -9,6 +11,56 @@ } +def is_valid_cdf_position_code(x): + from cdf.validators.common import POSITION_GROUPS + + return any(x in positions for positions in POSITION_GROUPS.values()) + + +def map_position_type_code_to_cdf(position_code): + """ + Docstring for map_position_type_code_to_cdf + + :param position_code: Description + """ + if is_valid_cdf_position_code(position_code): + return position_code + else: + if position_code == "UNK": + warnings.warn( + f"""position_code '{position_code}' identified within dataset. + \nThis means there is no appropriate mapping from the original position type (as provided by your data provider) to a kloppy.domain.PositionType. + \nTo resolve this, please open an issue at https://github.com/PySport/kloppy/issues""", + UserWarning, + ) + return None + elif position_code in ["DEF", "FB", "MID", "WM", "ATT"]: + warnings.warn( + f"""position_code '{position_code}' identified within dataset. + \nThere is no appropriate mapping for this position to Common Data Format.""", + UserWarning, + ) + return None + elif position_code == "LWB": + return "LB" + elif position_code == "RWB": + return "RB" + elif position_code == "DM": + return "CDM" + elif position_code == "AM": + return "CAM" + elif position_code == "LF": + return "LCF" + elif position_code == "ST": + return "CF" + elif position_code == "RF": + return "RCF" + else: + raise ValueError( + f"position.code '{position_code}' cannot be converted to CDF, because there is no appropriate mapping." + ) + + def extract_team_players(team): """Extract player IDs from a team.""" return [player.player_id for player in team.players] @@ -16,15 +68,18 @@ def extract_team_players(team): def get_player_coordinates(frame, ground: Ground): """Create player data list for a team from frame coordinates.""" + players = [] for player, coordinates in frame.players_coordinates.items(): if player.team.ground == ground: players.append( { - "id": player.player_id, + "id": str(player.player_id), "x": round(coordinates.x, 3), "y": round(coordinates.y, 3), - "position": player.starting_position.code, + "position": map_position_type_code_to_cdf( + player.starting_position.code + ), } ) return players @@ -43,7 +98,7 @@ def get_ball_coordinates(frame): "x": round(frame.ball_coordinates.x, 3), "y": round(frame.ball_coordinates.y, 3), } - # TODO: set to None after new CDF validator update + return {"x": None, "y": None, "z": None} @@ -77,9 +132,9 @@ def update_period_tracking(period_tracking, period_id, original_frame_id): period_tracking["offset"][period_id - 1] + prev_period_length ) - period_tracking["normalized_start_frame_id"][period_id] = ( - period_tracking["offset"][period_id] - ) + period_tracking["normalized_start_frame_id"][ + period_id + ] = period_tracking["offset"][period_id] period_tracking["end_frame_id"][period_id] = original_frame_id diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 9790d8f99..dd79e28af 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -146,12 +146,12 @@ def _serialize_tracking_frames(self, dataset, outputs, period_tracking): "ball_status": ball_status, "teams": { "home": { - "id": self._home_team.team_id, + "id": str(self._home_team.team_id), "players": home_players, "name": self._home_team.name, }, "away": { - "id": self._away_team.team_id, + "id": str(self._away_team.team_id), "players": away_players, "name": self._away_team.name, }, @@ -231,6 +231,7 @@ def _build_default_metadata_structure( "ball": None, "meta": None, "cdf": None, + "event": None, }, } diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 28d49dcaa..7eefab6b2 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -7,12 +7,15 @@ import json import warnings -from kloppy import sportec -from kloppy.domain import TrackingDataset +from kloppy import sportec, skillcorner +from kloppy.domain import TrackingDataset, PositionType from kloppy.infra.serializers.tracking.cdf.serializer import ( CDFTrackingDataSerializer, CDFOutputs, ) +from kloppy.infra.serializers.tracking.cdf.helpers import ( + is_valid_cdf_position_code, +) class TestCDFSerializer: @@ -24,6 +27,14 @@ def raw_data(self, base_dir) -> Path: def meta_data(self, base_dir) -> Path: return base_dir / "files/sportec_meta.xml" + @pytest.fixture + def meta_data_v3(self, base_dir) -> str: + return base_dir / "files/skillcorner_meta_data.json" + + @pytest.fixture + def raw_data_v3(self, base_dir) -> str: + return base_dir / "files/skillcorner_v3_raw_data.jsonl" + @pytest.fixture def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: """Load a small Sportec tracking data snippet for testing CDF serialization.""" @@ -35,6 +46,18 @@ def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: only_alive=False, ) + @pytest.fixture + def test_correct_deserialization_v3( + self, raw_data_v3: Path, meta_data_v3: Path + ): + return skillcorner.load( + meta_data=meta_data_v3, + raw_data=raw_data_v3, + coordinates="skillcorner", + include_empty_frames=True, + only_alive=False, + ) + def test_produces_valid_cdf_output(self, dataset): """Test that CDFTrackingDataSerializer produces valid CDF output.""" serializer = CDFTrackingDataSerializer() @@ -285,3 +308,35 @@ def test_serializer_handles_invalid_metadata_types(self, dataset): # Clean up Path(meta_path).unlink() Path(tracking_path).unlink() + + def test_cdf_positions(self): + """ + Make sure we have not introduced any non-cdf supported positions to kloppy PositionType. + If we did, update map_position_type_code_to_cdf + """ + + test_list = [] + + for position in PositionType: + if is_valid_cdf_position_code(position.code): + pass + else: + test_list.append(position.code) + + assert set(test_list) == set( + [ + "UNK", + "DEF", + "FB", + "LWB", + "RWB", + "MID", + "DM", + "AM", + "WM", + "ATT", + "LF", + "ST", + "RF", + ] + ) From 9e00f8e730fd20a6ac3bcacc1588094e0df605c2 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 11:21:12 +0100 Subject: [PATCH 08/16] io --- kloppy/infra/io/adapters/fsspec.py | 8 +++++--- kloppy/infra/io/adapters/zip.py | 8 +++++--- kloppy/infra/io/buffered_stream.py | 6 ++---- setup.py | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/kloppy/infra/io/adapters/fsspec.py b/kloppy/infra/io/adapters/fsspec.py index f97ae7765..26fd46f5e 100644 --- a/kloppy/infra/io/adapters/fsspec.py +++ b/kloppy/infra/io/adapters/fsspec.py @@ -104,9 +104,11 @@ def list_directory(self, url: str, recursive: bool = True) -> List[str]: else: files = fs.listdir(url, detail=False) return [ - f"{protocol}://{fp}" - if protocol != "file" and not fp.startswith(protocol) - else fp + ( + f"{protocol}://{fp}" + if protocol != "file" and not fp.startswith(protocol) + else fp + ) for fp in files ] diff --git a/kloppy/infra/io/adapters/zip.py b/kloppy/infra/io/adapters/zip.py index adbe38312..231eeb8e6 100644 --- a/kloppy/infra/io/adapters/zip.py +++ b/kloppy/infra/io/adapters/zip.py @@ -44,8 +44,10 @@ def list_directory(self, url: str, recursive: bool = True) -> List[str]: else: files = fs.listdir(url, detail=False) return [ - f"{protocol}://{fp}" - if protocol != "file" and not fp.startswith(protocol) - else fp + ( + f"{protocol}://{fp}" + if protocol != "file" and not fp.startswith(protocol) + else fp + ) for fp in files ] diff --git a/kloppy/infra/io/buffered_stream.py b/kloppy/infra/io/buffered_stream.py index d2d65f963..5bc1ba4fa 100644 --- a/kloppy/infra/io/buffered_stream.py +++ b/kloppy/infra/io/buffered_stream.py @@ -10,15 +10,13 @@ class SupportsWrite(Protocol): """Protocol for objects that support write operations.""" - def write(self, data: bytes) -> int: - ... + def write(self, data: bytes) -> int: ... class SupportsRead(Protocol): """Protocol for objects that support read operations.""" - def read(self, n: int) -> bytes: - ... + def read(self, n: int) -> bytes: ... class BufferedStream(tempfile.SpooledTemporaryFile): diff --git a/setup.py b/setup.py index 4a5d85590..c55871c86 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ def setup_package(): "flask", "flask-cors", "pytest-httpserver", - "common-data-format-validator>=0.0.11", + "common-data-format-validator>=0.0.12", ], "development": ["pre-commit==2.6.0"], "query": ["networkx>=2.4,<3"], From 6b904b2d189bb9e92123a8ad995152870176150c Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 11:23:10 +0100 Subject: [PATCH 09/16] improved (now complete) skillcorner position mapping --- kloppy/infra/serializers/tracking/skillcorner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index d7804d7d4..c6b810a22 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -35,7 +35,8 @@ frame_rate = 10 position_types_mapping: Dict[int, PositionType] = { - 1: PositionType.Unknown, + 0: PositionType.Goalkeeper, + 1: PositionType.Unknown, # Does not exist 2: PositionType.CenterBack, # Provider: CB 3: PositionType.LeftCenterBack, # Provider: LCB 4: PositionType.RightCenterBack, # Provider: RCB @@ -52,6 +53,11 @@ 15: PositionType.Striker, # Provider: CF (mapped to Striker) 16: PositionType.RightForward, # Provider: RF 17: PositionType.Unknown, # Provider: SUB (mapped to Unknown) + 18: PositionType.Unknown, # Does not exist + 19: PositionType.LeftBack, + 20: PositionType.RightBack, + 21: PositionType.LeftDefensiveMidfield, + 22: PositionType.RightDefensiveMidfield, } From c731a760d62af8c032681e346701c7047e254c54 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 11:34:30 +0100 Subject: [PATCH 10/16] add import error --- kloppy/infra/serializers/tracking/cdf/helpers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kloppy/infra/serializers/tracking/cdf/helpers.py b/kloppy/infra/serializers/tracking/cdf/helpers.py index 2673d482c..0722e9581 100644 --- a/kloppy/infra/serializers/tracking/cdf/helpers.py +++ b/kloppy/infra/serializers/tracking/cdf/helpers.py @@ -12,7 +12,13 @@ def is_valid_cdf_position_code(x): - from cdf.validators.common import POSITION_GROUPS + try: + from cdf.validators.common import POSITION_GROUPS + except ImportError: + raise ImportError( + "Seems like you don't have common-data-format-validator installed. Please" + " install it using: pip install common-data-format-validator" + ) return any(x in positions for positions in POSITION_GROUPS.values()) From 3100eb561dde1e43b04508c33a4118fd27a5a185 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 13:11:16 +0100 Subject: [PATCH 11/16] improved tests, 2 providers --- .../infra/serializers/tracking/cdf/helpers.py | 8 +- .../serializers/tracking/cdf/serializer.py | 4 +- kloppy/tests/test_cdf.py | 525 +++++++++--------- 3 files changed, 282 insertions(+), 255 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/helpers.py b/kloppy/infra/serializers/tracking/cdf/helpers.py index 0722e9581..feb6ae7c0 100644 --- a/kloppy/infra/serializers/tracking/cdf/helpers.py +++ b/kloppy/infra/serializers/tracking/cdf/helpers.py @@ -202,8 +202,8 @@ def build_periods_info(dataset, period_tracking, home_team, away_team): "end_frame_id": period_tracking["normalized_end_frame_id"][ period.id ], - "left_team_id": home_team.team_id, - "right_team_id": away_team.team_id, + "left_team_id": str(home_team.team_id), + "right_team_id": str(away_team.team_id), } ) return periods_info @@ -258,8 +258,8 @@ def build_team_players_metadata(team, starters): for player in team.players: players.append( { - "id": player.player_id, - "team_id": team.team_id, + "id": str(player.player_id), + "team_id": str(team.team_id), "jersey_number": player.jersey_no, "is_starter": player.player_id in starters, } diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index dd79e28af..7841f60ed 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -208,7 +208,7 @@ def _build_default_metadata_structure( }, "teams": { "home": { - "id": self._home_team.team_id, + "id": str(self._home_team.team_id), "players": build_team_players_metadata( self._home_team, home_starters ), @@ -216,7 +216,7 @@ def _build_default_metadata_structure( "formation": home_formation, }, "away": { - "id": self._away_team.team_id, + "id": str(self._away_team.team_id), "players": build_team_players_metadata( self._away_team, away_starters ), diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 7eefab6b2..7a4e66f8c 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -18,6 +18,254 @@ ) +def produces_valid_cdf_output(dataset): + """Test that CDFTrackingDataSerializer produces valid CDF output.""" + serializer = CDFTrackingDataSerializer() + + # Instantiate Validators + meta_validator = cdf.MetaSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" + ) + tracking_validator = cdf.TrackingSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" + ) + + with tempfile.NamedTemporaryFile( + mode="w+b", suffix=".json", delete=False + ) as meta_file, tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) as tracking_file: + + # Instantiate the named tuple for outputs + outputs = CDFOutputs(meta_data=meta_file, tracking_data=tracking_file) + + # Serialize the dataset and capture warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + success = serializer.serialize(dataset, outputs) + assert success is True + + # Verify warnings about missing mandatory IDs were raised + missing_id_warnings = [ + warning + for warning in w + if issubclass(warning.category, UserWarning) + and "Missing mandatory ID" in str(warning.message) + ] + + # Should have warnings for competition.id, season.id, and stadium.id + assert len(missing_id_warnings) == 3, ( + f"Expected 3 missing mandatory ID warnings, but got {len(missing_id_warnings)}: " + f"{[str(warning.message) for warning in missing_id_warnings]}" + ) + + # Check specific warnings are present + warning_messages = [ + str(warning.message) for warning in missing_id_warnings + ] + assert any( + "competition.id" in msg for msg in warning_messages + ), "Missing warning for competition.id" + assert any( + "season.id" in msg for msg in warning_messages + ), "Missing warning for season.id" + assert any( + "stadium.id" in msg for msg in warning_messages + ), "Missing warning for stadium.id" + + # Save paths for validation after leaving the block + meta_path = meta_file.name + tracking_path = tracking_file.name + + # Validate metadata + meta_validator.validate_schema(sample=meta_path) + + # Validate tracking data - read and validate each line (frame) in the JSONL file + with open(tracking_path, "r") as f: + frame_count = 0 + for line in f: + if line.strip(): # Skip empty lines + frame_data = json.loads(line) + # Validate each frame against the tracking schema + tracking_validator.validate_schema(sample=frame_data) + frame_count += 1 + + assert frame_count > 0, "No frames were serialized" + + # Clean up + Path(meta_path).unlink() + Path(tracking_path).unlink() + + +def produces_valid_cdf_output_with_additional_metadata(dataset): + """Test that CDFTrackingDataSerializer produces valid CDF output with additional metadata.""" + serializer = CDFTrackingDataSerializer() + + # Instantiate Validators + meta_validator = cdf.MetaSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" + ) + tracking_validator = cdf.TrackingSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" + ) + + # Define additional metadata + additional_metadata = { + "competition": { + "id": "COMP_123", + "name": "Test Competition", + "format": "league_20", + }, + "season": {"id": "SEASON_2024", "name": "2024/25"}, + "stadium": { + "id": "STADIUM_456", + "name": "Test Arena", + "turf": "grass", + }, + "meta": { + "tracking": { + "version": "2.0.0", + "name": "TestTracker", + "fps": 30, + "collection_timing": "live", + } + }, + } + + with tempfile.NamedTemporaryFile( + mode="w+b", suffix=".json", delete=False + ) as meta_file, tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) as tracking_file: + + # Instantiate the named tuple for outputs + outputs = CDFOutputs(meta_data=meta_file, tracking_data=tracking_file) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + success = serializer.serialize( + dataset, outputs, additional_metadata=additional_metadata + ) + assert success is True + + # Verify no warnings about missing mandatory IDs were raised + missing_id_warnings = [ + warning + for warning in w + if issubclass(warning.category, UserWarning) + and "Missing mandatory ID" in str(warning.message) + ] + assert len(missing_id_warnings) == 0, ( + f"Expected no missing mandatory ID warnings, but got {len(missing_id_warnings)}: " + f"{[str(warning.message) for warning in missing_id_warnings]}" + ) + + # Save paths for validation after leaving the block + meta_path = meta_file.name + tracking_path = tracking_file.name + + # Validate metadata + meta_validator.validate_schema(sample=meta_path) + + # Verify additional metadata was applied correctly + with open(meta_path, "r") as f: + meta_data = json.load(f) + + # Check competition metadata + assert meta_data["competition"]["id"] == "COMP_123" + assert meta_data["competition"]["name"] == "Test Competition" + assert meta_data["competition"]["format"] == "league_20" + + # Check season metadata + assert meta_data["season"]["id"] == "SEASON_2024" + assert meta_data["season"]["name"] == "2024/25" + + # Check stadium metadata + assert meta_data["stadium"]["id"] == "STADIUM_456" + assert meta_data["stadium"]["name"] == "Test Arena" + assert meta_data["stadium"]["turf"] == "grass" + # Verify default values still present + assert "pitch_length" in meta_data["stadium"] + assert "pitch_width" in meta_data["stadium"] + + # Check meta tracking information + assert meta_data["meta"]["tracking"]["version"] == "2.0.0" + assert meta_data["meta"]["tracking"]["name"] == "TestTracker" + assert meta_data["meta"]["tracking"]["fps"] == 30 + assert meta_data["meta"]["tracking"]["collection_timing"] == "live" + + # Validate tracking data - read and validate each line (frame) in the JSONL file + with open(tracking_path, "r") as f: + frame_count = 0 + for line in f: + if line.strip(): # Skip empty lines + frame_data = json.loads(line) + # Validate each frame against the tracking schema + tracking_validator.validate_schema(sample=frame_data) + frame_count += 1 + + assert frame_count > 0, "No frames were serialized" + + # Clean up + Path(meta_path).unlink() + Path(tracking_path).unlink() + + +def serializer_handles_invalid_metadata_types(dataset): + """Test that CDFTrackingDataSerializer handles invalid metadata types gracefully.""" + serializer = CDFTrackingDataSerializer() + + with tempfile.NamedTemporaryFile( + mode="w+b", suffix=".json", delete=False + ) as meta_file, tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) as tracking_file: + + meta_path = meta_file.name + tracking_path = tracking_file.name + + outputs = CDFOutputs(meta_data=meta_file, tracking_data=tracking_file) + + # Test with invalid metadata types - should still serialize but may fail validation + invalid_metadata = { + "competition": { + "id": 123, # Should be string + }, + "season": { + "id": ["2024"], # Should be string, not list + }, + "stadium": { + "id": None, # Should be string + "pitch_length": "one hundred five", # Should be float/int + }, + "meta": { + "tracking": { + "fps": "25", # Should be int + "version": 1.0, # Should be string + } + }, + } + + # Serialization should succeed (no type checking in serializer) + success = serializer.serialize( + dataset, outputs, additional_metadata=invalid_metadata + ) + assert success is True + + # The file should be created but validation should fail + meta_validator = cdf.MetaSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" + ) + + # Validation should fail due to type mismatches + with pytest.raises(Exception): # Could be ValidationError or similar + meta_validator.validate_schema(sample=meta_path) + + # Clean up + Path(meta_path).unlink() + Path(tracking_path).unlink() + + class TestCDFSerializer: @pytest.fixture def raw_data(self, base_dir) -> Path: @@ -29,14 +277,16 @@ def meta_data(self, base_dir) -> Path: @pytest.fixture def meta_data_v3(self, base_dir) -> str: - return base_dir / "files/skillcorner_meta_data.json" + return base_dir / "files/skillcorner_v3_meta_data-2.json" @pytest.fixture def raw_data_v3(self, base_dir) -> str: - return base_dir / "files/skillcorner_v3_raw_data.jsonl" + return base_dir / "files/skillcorner_v3_raw_data-2.jsonl" @pytest.fixture - def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: + def dataset_sportec( + self, raw_data: Path, meta_data: Path + ) -> TrackingDataset: """Load a small Sportec tracking data snippet for testing CDF serialization.""" return sportec.load_tracking( raw_data=raw_data, @@ -47,9 +297,7 @@ def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: ) @pytest.fixture - def test_correct_deserialization_v3( - self, raw_data_v3: Path, meta_data_v3: Path - ): + def dataset_skillcorner(self, raw_data_v3: Path, meta_data_v3: Path): return skillcorner.load( meta_data=meta_data_v3, raw_data=raw_data_v3, @@ -58,256 +306,35 @@ def test_correct_deserialization_v3( only_alive=False, ) - def test_produces_valid_cdf_output(self, dataset): - """Test that CDFTrackingDataSerializer produces valid CDF output.""" - serializer = CDFTrackingDataSerializer() - - # Instantiate Validators - meta_validator = cdf.MetaSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" - ) - tracking_validator = cdf.TrackingSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" - ) - - with tempfile.NamedTemporaryFile( - mode="w+b", suffix=".json", delete=False - ) as meta_file, tempfile.NamedTemporaryFile( - mode="w+b", suffix=".jsonl", delete=False - ) as tracking_file: - - # Instantiate the named tuple for outputs - outputs = CDFOutputs( - meta_data=meta_file, tracking_data=tracking_file - ) - - # Serialize the dataset and capture warnings - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - success = serializer.serialize(dataset, outputs) - assert success is True - - # Verify warnings about missing mandatory IDs were raised - missing_id_warnings = [ - warning - for warning in w - if issubclass(warning.category, UserWarning) - and "Missing mandatory ID" in str(warning.message) - ] - - # Should have warnings for competition.id, season.id, and stadium.id - assert len(missing_id_warnings) == 3, ( - f"Expected 3 missing mandatory ID warnings, but got {len(missing_id_warnings)}: " - f"{[str(warning.message) for warning in missing_id_warnings]}" - ) - - # Check specific warnings are present - warning_messages = [ - str(warning.message) for warning in missing_id_warnings - ] - assert any( - "competition.id" in msg for msg in warning_messages - ), "Missing warning for competition.id" - assert any( - "season.id" in msg for msg in warning_messages - ), "Missing warning for season.id" - assert any( - "stadium.id" in msg for msg in warning_messages - ), "Missing warning for stadium.id" - - # Save paths for validation after leaving the block - meta_path = meta_file.name - tracking_path = tracking_file.name - - # Validate metadata - meta_validator.validate_schema(sample=meta_path) + def test_produces_valid_cdf_output_sportec(self, dataset_sportec): + produces_valid_cdf_output(dataset=dataset_sportec) - # Validate tracking data - read and validate each line (frame) in the JSONL file - with open(tracking_path, "r") as f: - frame_count = 0 - for line in f: - if line.strip(): # Skip empty lines - frame_data = json.loads(line) - # Validate each frame against the tracking schema - tracking_validator.validate_schema(sample=frame_data) - frame_count += 1 - - assert frame_count > 0, "No frames were serialized" - - # Clean up - Path(meta_path).unlink() - Path(tracking_path).unlink() - - def test_produces_valid_cdf_output_with_additional_metadata(self, dataset): - """Test that CDFTrackingDataSerializer produces valid CDF output with additional metadata.""" - serializer = CDFTrackingDataSerializer() - - # Instantiate Validators - meta_validator = cdf.MetaSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" - ) - tracking_validator = cdf.TrackingSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" + def test_produces_valid_cdf_output_with_additional_metadata_sportec( + self, dataset_sportec + ): + produces_valid_cdf_output_with_additional_metadata( + dataset=dataset_sportec ) - # Define additional metadata - additional_metadata = { - "competition": { - "id": "COMP_123", - "name": "Test Competition", - "format": "league_20", - }, - "season": {"id": "SEASON_2024", "name": "2024/25"}, - "stadium": { - "id": "STADIUM_456", - "name": "Test Arena", - "turf": "grass", - }, - "meta": { - "tracking": { - "version": "2.0.0", - "name": "TestTracker", - "fps": 30, - "collection_timing": "live", - } - }, - } - - with tempfile.NamedTemporaryFile( - mode="w+b", suffix=".json", delete=False - ) as meta_file, tempfile.NamedTemporaryFile( - mode="w+b", suffix=".jsonl", delete=False - ) as tracking_file: - - # Instantiate the named tuple for outputs - outputs = CDFOutputs( - meta_data=meta_file, tracking_data=tracking_file - ) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - success = serializer.serialize( - dataset, outputs, additional_metadata=additional_metadata - ) - assert success is True - - # Verify no warnings about missing mandatory IDs were raised - missing_id_warnings = [ - warning - for warning in w - if issubclass(warning.category, UserWarning) - and "Missing mandatory ID" in str(warning.message) - ] - assert len(missing_id_warnings) == 0, ( - f"Expected no missing mandatory ID warnings, but got {len(missing_id_warnings)}: " - f"{[str(warning.message) for warning in missing_id_warnings]}" - ) - - # Save paths for validation after leaving the block - meta_path = meta_file.name - tracking_path = tracking_file.name - - # Validate metadata - meta_validator.validate_schema(sample=meta_path) - - # Verify additional metadata was applied correctly - with open(meta_path, "r") as f: - meta_data = json.load(f) - - # Check competition metadata - assert meta_data["competition"]["id"] == "COMP_123" - assert meta_data["competition"]["name"] == "Test Competition" - assert meta_data["competition"]["format"] == "league_20" - - # Check season metadata - assert meta_data["season"]["id"] == "SEASON_2024" - assert meta_data["season"]["name"] == "2024/25" - - # Check stadium metadata - assert meta_data["stadium"]["id"] == "STADIUM_456" - assert meta_data["stadium"]["name"] == "Test Arena" - assert meta_data["stadium"]["turf"] == "grass" - # Verify default values still present - assert "pitch_length" in meta_data["stadium"] - assert "pitch_width" in meta_data["stadium"] - - # Check meta tracking information - assert meta_data["meta"]["tracking"]["version"] == "2.0.0" - assert meta_data["meta"]["tracking"]["name"] == "TestTracker" - assert meta_data["meta"]["tracking"]["fps"] == 30 - assert meta_data["meta"]["tracking"]["collection_timing"] == "live" - - # Validate tracking data - read and validate each line (frame) in the JSONL file - with open(tracking_path, "r") as f: - frame_count = 0 - for line in f: - if line.strip(): # Skip empty lines - frame_data = json.loads(line) - # Validate each frame against the tracking schema - tracking_validator.validate_schema(sample=frame_data) - frame_count += 1 - - assert frame_count > 0, "No frames were serialized" - - # Clean up - Path(meta_path).unlink() - Path(tracking_path).unlink() - - def test_serializer_handles_invalid_metadata_types(self, dataset): - """Test that CDFTrackingDataSerializer handles invalid metadata types gracefully.""" - serializer = CDFTrackingDataSerializer() - - with tempfile.NamedTemporaryFile( - mode="w+b", suffix=".json", delete=False - ) as meta_file, tempfile.NamedTemporaryFile( - mode="w+b", suffix=".jsonl", delete=False - ) as tracking_file: - - meta_path = meta_file.name - tracking_path = tracking_file.name - - outputs = CDFOutputs( - meta_data=meta_file, tracking_data=tracking_file - ) - - # Test with invalid metadata types - should still serialize but may fail validation - invalid_metadata = { - "competition": { - "id": 123, # Should be string - }, - "season": { - "id": ["2024"], # Should be string, not list - }, - "stadium": { - "id": None, # Should be string - "pitch_length": "one hundred five", # Should be float/int - }, - "meta": { - "tracking": { - "fps": "25", # Should be int - "version": 1.0, # Should be string - } - }, - } + def test_serializer_handles_invalid_metadata_types_sportec( + self, dataset_sportec + ): + serializer_handles_invalid_metadata_types(dataset=dataset_sportec) - # Serialization should succeed (no type checking in serializer) - success = serializer.serialize( - dataset, outputs, additional_metadata=invalid_metadata - ) - assert success is True + def test_produces_valid_cdf_output_skillcorner(self, dataset_skillcorner): + produces_valid_cdf_output(dataset=dataset_skillcorner) - # The file should be created but validation should fail - meta_validator = cdf.MetaSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" + def test_produces_valid_cdf_output_with_additional_metadata_skillcorner( + self, dataset_skillcorner + ): + produces_valid_cdf_output_with_additional_metadata( + dataset=dataset_skillcorner ) - # Validation should fail due to type mismatches - with pytest.raises(Exception): # Could be ValidationError or similar - meta_validator.validate_schema(sample=meta_path) - - # Clean up - Path(meta_path).unlink() - Path(tracking_path).unlink() + def test_serializer_handles_invalid_metadata_types_skillcorner( + self, dataset_skillcorner + ): + serializer_handles_invalid_metadata_types(dataset=dataset_skillcorner) def test_cdf_positions(self): """ From b72f03ec07802d10eeb89642dd793be833c0d316 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 13:11:54 +0100 Subject: [PATCH 12/16] skillcorner additional test files --- .../tests/files/skillcorner_v3_meta_data-2.json | 1 + .../tests/files/skillcorner_v3_raw_data-2.jsonl | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 kloppy/tests/files/skillcorner_v3_meta_data-2.json create mode 100644 kloppy/tests/files/skillcorner_v3_raw_data-2.jsonl diff --git a/kloppy/tests/files/skillcorner_v3_meta_data-2.json b/kloppy/tests/files/skillcorner_v3_meta_data-2.json new file mode 100644 index 000000000..10779035f --- /dev/null +++ b/kloppy/tests/files/skillcorner_v3_meta_data-2.json @@ -0,0 +1 @@ +{"id":1886347,"home_team_score":2,"away_team_score":0,"date_time":"2024-11-30T04:00:00Z","stadium":{"id":3811,"name":"Mount Smart Stadium","city":"Auckland","capacity":25000},"home_team":{"id":4177,"name":"Auckland FC","short_name":"Auckland FC","acronym":"AUC"},"home_team_kit":{"id":14025,"team_id":4177,"season":{"id":95,"start_year":2024,"end_year":2025,"name":"2024/2025"},"name":"Home","jersey_color":"#2800f0","number_color":"#ffffff"},"away_team":{"id":1805,"name":"Newcastle United Jets FC","short_name":"Newcastle","acronym":"NEW"},"away_team_kit":{"id":10376,"team_id":1805,"season":{"id":29,"start_year":2024,"end_year":2024,"name":"2024"},"name":"away","jersey_color":"#ffffff","number_color":"#000000"},"home_team_coach":null,"away_team_coach":null,"home_team_playing_time":{"minutes_tip":31.13,"minutes_otip":22.2},"away_team_playing_time":{"minutes_tip":22.2,"minutes_otip":31.13},"competition_edition":{"id":870,"competition":{"id":61,"area":"AUS","name":"A-League","gender":"male","age_group":"adult"},"season":{"id":95,"start_year":2024,"end_year":2025,"name":"2024/2025"},"name":"AUS - A-League - 2024/2025"},"match_periods":[{"period":1,"name":"period_1","start_frame":10,"end_frame":27790,"duration_frames":27780,"duration_minutes":46.3},{"period":2,"name":"period_2","start_frame":27800,"end_frame":59060,"duration_frames":31260,"duration_minutes":52.1}],"competition_round":{"id":611,"name":"Round 6","round_number":6,"potential_overtime":false},"referees":[],"players":[{"player_role":{"id":15,"position_group":"Center Forward","name":"Center Forward","acronym":"CF"},"start_time":"00:00:00","end_time":"01:25:21","number":10,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":29.55,"minutes_otip":18.76,"start_frame":10,"end_frame":52009,"minutes_played":86.65,"minutes_played_regular_time":86.65},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":11.34,"minutes_otip":7.73,"start_frame":27800,"end_frame":52009,"minutes_played":40.35}]},"team_player_id":1507965,"team_id":4177,"id":38673,"first_name":"Guillermo Luis","last_name":"May Bartesaghi","short_name":"G. May","birthday":"1998-03-11","trackable_object":39794,"gender":"male"},{"player_role":{"id":20,"position_group":"Full Back","name":"Right Back","acronym":"RB"},"start_time":"00:00:00","end_time":null,"number":17,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":31.13,"minutes_otip":22.2,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":12.92,"minutes_otip":11.18,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1507968,"team_id":4177,"id":51713,"first_name":"Callan","last_name":"Elliot","short_name":"C. Elliott","birthday":"1999-07-07","trackable_object":52839,"gender":"male"},{"player_role":{"id":15,"position_group":"Center Forward","name":"Center Forward","acronym":"CF"},"start_time":"00:00:00","end_time":"01:16:37","number":22,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":26.28,"minutes_otip":17.94,"start_frame":10,"end_frame":46769,"minutes_played":77.91,"minutes_played_regular_time":77.91},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":8.07,"minutes_otip":6.91,"start_frame":27800,"end_frame":46769,"minutes_played":31.62}]},"team_player_id":1507963,"team_id":4177,"id":50951,"first_name":"Jake","last_name":"Brimmer","short_name":"J. Brimmer","birthday":"1998-04-03","trackable_object":52077,"gender":"male"},{"player_role":{"id":21,"position_group":"Midfield","name":"Left Defensive Midfield","acronym":"LDM"},"start_time":"00:00:00","end_time":"01:24:58","number":19,"yellow_card":1,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":18.76,"minutes_otip":29.55,"start_frame":10,"end_frame":51779,"minutes_played":86.26,"minutes_played_regular_time":86.26},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":7.73,"minutes_otip":11.34,"start_frame":27800,"end_frame":51779,"minutes_played":39.97}]},"team_player_id":1087119,"team_id":1805,"id":50978,"first_name":"Callum","last_name":"Timmins","short_name":"C. Timmins","birthday":"1999-12-23","trackable_object":52104,"gender":"male"},{"player_role":{"id":19,"position_group":"Full Back","name":"Left Back","acronym":"LB"},"start_time":"00:00:00","end_time":null,"number":15,"yellow_card":0,"red_card":0,"injured":false,"goal":1,"own_goal":0,"playing_time":{"total":{"minutes_tip":31.13,"minutes_otip":22.2,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":12.92,"minutes_otip":11.18,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1507956,"team_id":4177,"id":133498,"first_name":"Francis","last_name":"De Vries","short_name":"F. De Vries","birthday":"1994-11-28","trackable_object":135053,"gender":"male"},{"player_role":{"id":3,"position_group":"Central Defender","name":"Left Center Back","acronym":"LCB"},"start_time":"00:00:00","end_time":null,"number":4,"yellow_card":1,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":31.13,"minutes_otip":22.2,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":12.92,"minutes_otip":11.18,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1507971,"team_id":4177,"id":33697,"first_name":"Nando","last_name":"Pijnaker","short_name":"N. Pijnaker","birthday":"1999-02-25","trackable_object":34805,"gender":"male"},{"player_role":{"id":4,"position_group":"Central Defender","name":"Right Center Back","acronym":"RCB"},"start_time":"00:00:00","end_time":null,"number":23,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":31.13,"minutes_otip":22.2,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":12.92,"minutes_otip":11.18,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1507974,"team_id":4177,"id":51667,"first_name":"Daniel","last_name":"Hall","short_name":"D. Hall","birthday":"1999-06-14","trackable_object":52793,"gender":"male"},{"player_role":{"id":7,"position_group":"Midfield","name":"Defensive Midfield","acronym":"DM"},"start_time":"00:00:00","end_time":null,"number":6,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":31.13,"minutes_otip":22.2,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":12.92,"minutes_otip":11.18,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1507966,"team_id":4177,"id":14736,"first_name":"Louis","last_name":"Verstraete","short_name":"L. Verstraete","birthday":"1999-05-04","trackable_object":14933,"gender":"male"},{"player_role":{"id":17,"position_group":"Other","name":"Substitute","acronym":"SUB"},"start_time":null,"end_time":null,"number":28,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":null,"by_period":[]},"team_player_id":1508548,"team_id":1805,"id":966125,"first_name":"Will ","last_name":"Dobson","short_name":"W. Will Dobson","birthday":null,"trackable_object":967688,"gender":"male"},{"player_role":{"id":17,"position_group":"Other","name":"Substitute","acronym":"SUB"},"start_time":null,"end_time":null,"number":21,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":null,"by_period":[]},"team_player_id":1508547,"team_id":1805,"id":966124,"first_name":"Noah Paul","last_name":"James","short_name":"N. James","birthday":"2001-02-14","trackable_object":967687,"gender":"male"},{"player_role":{"id":17,"position_group":"Other","name":"Substitute","acronym":"SUB"},"start_time":null,"end_time":null,"number":27,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":null,"by_period":[]},"team_player_id":1229663,"team_id":1805,"id":808965,"first_name":"Nathan","last_name":"Grimaldi","short_name":"N. Grimaldi","birthday":"2001-09-15","trackable_object":810528,"gender":"male"},{"player_role":{"id":12,"position_group":"Wide Attacker","name":"Left Winger","acronym":"LW"},"start_time":"00:00:00","end_time":null,"number":39,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":22.2,"minutes_otip":31.13,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":11.18,"minutes_otip":12.92,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1087109,"team_id":1805,"id":735573,"first_name":"Thomas","last_name":"Aquilina","short_name":"T. Aquilina","birthday":"2001-02-02","trackable_object":737136,"gender":"male"},{"player_role":{"id":0,"position_group":"Other","name":"Goalkeeper","acronym":"GK"},"start_time":"00:00:00","end_time":null,"number":1,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":22.2,"minutes_otip":31.13,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":11.18,"minutes_otip":12.92,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1204873,"team_id":1805,"id":51009,"first_name":"Ryan","last_name":"Scott","short_name":"R. Scott","birthday":"1995-12-18","trackable_object":52135,"gender":"male"},{"player_role":{"id":4,"position_group":"Central Defender","name":"Right Center Back","acronym":"RCB"},"start_time":"00:00:00","end_time":null,"number":4,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":22.2,"minutes_otip":31.13,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":11.18,"minutes_otip":12.92,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1204875,"team_id":1805,"id":176224,"first_name":"Phillip","last_name":"Cancar","short_name":"P. Cancar","birthday":"2001-05-11","trackable_object":177779,"gender":"male"},{"player_role":{"id":19,"position_group":"Full Back","name":"Left Back","acronym":"LB"},"start_time":"00:00:00","end_time":null,"number":33,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":22.2,"minutes_otip":31.13,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":11.18,"minutes_otip":12.92,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1087123,"team_id":1805,"id":735578,"first_name":"Mark","last_name":"Natta","short_name":"M. Natta","birthday":"2002-11-28","trackable_object":737141,"gender":"male"},{"player_role":{"id":22,"position_group":"Midfield","name":"Right Defensive Midfield","acronym":"RDM"},"start_time":"00:00:00","end_time":null,"number":17,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":22.2,"minutes_otip":31.13,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":11.18,"minutes_otip":12.92,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1087114,"team_id":1805,"id":735574,"first_name":"Kosta","last_name":"Grozos","short_name":"K. Grozos","birthday":"2000-08-10","trackable_object":737137,"gender":"male"},{"player_role":{"id":20,"position_group":"Full Back","name":"Right Back","acronym":"RB"},"start_time":"00:00:00","end_time":null,"number":14,"yellow_card":1,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":22.2,"minutes_otip":31.13,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":11.18,"minutes_otip":12.92,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1087111,"team_id":1805,"id":50983,"first_name":"Dane","last_name":"Ingham","short_name":"D. Ingham","birthday":"1999-06-08","trackable_object":52109,"gender":"male"},{"player_role":{"id":3,"position_group":"Central Defender","name":"Left Center Back","acronym":"LCB"},"start_time":"00:00:00","end_time":null,"number":15,"yellow_card":1,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":22.2,"minutes_otip":31.13,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":11.18,"minutes_otip":12.92,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1508538,"team_id":1805,"id":51649,"first_name":"Aleksandar","last_name":"Šušnjar","short_name":"A. Šušnjar","birthday":"1995-08-19","trackable_object":52775,"gender":"male"},{"player_role":{"id":12,"position_group":"Wide Attacker","name":"Left Winger","acronym":"LW"},"start_time":"00:00:00","end_time":null,"number":14,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":31.13,"minutes_otip":22.2,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":12.92,"minutes_otip":11.18,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1507975,"team_id":4177,"id":965685,"first_name":"Liam ","last_name":"Gillion","short_name":"L. Gillion","birthday":"2002-10-17","trackable_object":967248,"gender":"male"},{"player_role":{"id":15,"position_group":"Center Forward","name":"Center Forward","acronym":"CF"},"start_time":"01:07:43","end_time":null,"number":13,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":6.28,"minutes_otip":6.56,"start_frame":41430,"end_frame":59059,"minutes_played":29.38,"minutes_played_regular_time":29.38},"by_period":[{"name":"period_2","minutes_tip":6.28,"minutes_otip":6.56,"start_frame":41430,"end_frame":59059,"minutes_played":29.38}]},"team_player_id":1204871,"team_id":1805,"id":795506,"first_name":"Clayton","last_name":"Taylor","short_name":"C. Taylor","birthday":"2004-03-01","trackable_object":797069,"gender":"male"},{"player_role":{"id":11,"position_group":"Midfield","name":"Attacking Midfield","acronym":"AM"},"start_time":"01:24:58","end_time":null,"number":6,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":3.45,"minutes_otip":1.58,"start_frame":51780,"end_frame":59059,"minutes_played":12.13,"minutes_played_regular_time":12.13},"by_period":[{"name":"period_2","minutes_tip":3.45,"minutes_otip":1.58,"start_frame":51780,"end_frame":59059,"minutes_played":12.13}]},"team_player_id":1508543,"team_id":1805,"id":797297,"first_name":"Matthew","last_name":"Scarcella","short_name":"M. Scarcella","birthday":"2004-03-04","trackable_object":798860,"gender":"male"},{"player_role":{"id":11,"position_group":"Midfield","name":"Attacking Midfield","acronym":"AM"},"start_time":"01:00:15","end_time":null,"number":23,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":6.98,"minutes_otip":7.76,"start_frame":36950,"end_frame":59059,"minutes_played":36.85,"minutes_played_regular_time":36.85},"by_period":[{"name":"period_2","minutes_tip":6.98,"minutes_otip":7.76,"start_frame":36950,"end_frame":59059,"minutes_played":36.85}]},"team_player_id":1204874,"team_id":1805,"id":560992,"first_name":"Daniel","last_name":"Wilmering","short_name":"D. Wilmering","birthday":"2000-12-19","trackable_object":562549,"gender":"male"},{"player_role":{"id":21,"position_group":"Midfield","name":"Left Defensive Midfield","acronym":"LDM"},"start_time":"01:24:58","end_time":null,"number":29,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":3.45,"minutes_otip":1.58,"start_frame":51780,"end_frame":59059,"minutes_played":12.13,"minutes_played_regular_time":12.13},"by_period":[{"name":"period_2","minutes_tip":3.45,"minutes_otip":1.58,"start_frame":51780,"end_frame":59059,"minutes_played":12.13}]},"team_player_id":1216248,"team_id":1805,"id":800320,"first_name":"Justin","last_name":"Vidic","short_name":"J. Vidic","birthday":"2004-04-29","trackable_object":801883,"gender":"male"},{"player_role":{"id":13,"position_group":"Wide Attacker","name":"Right Winger","acronym":"RW"},"start_time":"01:09:53","end_time":null,"number":25,"yellow_card":0,"red_card":0,"injured":false,"goal":1,"own_goal":0,"playing_time":{"total":{"minutes_tip":6.49,"minutes_otip":5.75,"start_frame":42730,"end_frame":59059,"minutes_played":27.22,"minutes_played_regular_time":27.22},"by_period":[{"name":"period_2","minutes_tip":6.49,"minutes_otip":5.75,"start_frame":42730,"end_frame":59059,"minutes_played":27.22}]},"team_player_id":1507969,"team_id":4177,"id":43829,"first_name":"Neyder Stiven","last_name":"Moreno Betancur","short_name":"N. Moreno","birthday":"1997-02-09","trackable_object":44955,"gender":"male"},{"player_role":{"id":2,"position_group":"Central Defender","name":"Center Back","acronym":"CB"},"start_time":"01:25:21","end_time":null,"number":5,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":1.58,"minutes_otip":3.45,"start_frame":52010,"end_frame":59059,"minutes_played":11.75,"minutes_played_regular_time":11.75},"by_period":[{"name":"period_2","minutes_tip":1.58,"minutes_otip":3.45,"start_frame":52010,"end_frame":59059,"minutes_played":11.75}]},"team_player_id":1507955,"team_id":4177,"id":31147,"first_name":"Tommy","last_name":"Smith","short_name":"T. Smith","birthday":"1990-03-31","trackable_object":32245,"gender":"male"},{"player_role":{"id":15,"position_group":"Center Forward","name":"Center Forward","acronym":"CF"},"start_time":"01:16:37","end_time":null,"number":9,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":4.85,"minutes_otip":4.27,"start_frame":46770,"end_frame":59059,"minutes_played":20.48,"minutes_played_regular_time":20.48},"by_period":[{"name":"period_2","minutes_tip":4.85,"minutes_otip":4.27,"start_frame":46770,"end_frame":59059,"minutes_played":20.48}]},"team_player_id":1507964,"team_id":4177,"id":163972,"first_name":"Max","last_name":"Mata","short_name":"M. Mata","birthday":"2000-07-10","trackable_object":165527,"gender":"male"},{"player_role":{"id":13,"position_group":"Wide Attacker","name":"Right Winger","acronym":"RW"},"start_time":"00:00:00","end_time":"01:09:53","number":27,"yellow_card":1,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":24.64,"minutes_otip":16.45,"start_frame":10,"end_frame":42729,"minutes_played":71.18,"minutes_played_regular_time":71.18},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":6.43,"minutes_otip":5.42,"start_frame":27800,"end_frame":42729,"minutes_played":24.88}]},"team_player_id":1507961,"team_id":4177,"id":133501,"first_name":"Logan","last_name":"Rogerson","short_name":"L. Rogerson","birthday":"1998-05-28","trackable_object":135056,"gender":"male"},{"player_role":{"id":11,"position_group":"Midfield","name":"Attacking Midfield","acronym":"AM"},"start_time":"00:00:00","end_time":null,"number":28,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":31.13,"minutes_otip":22.2,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":12.92,"minutes_otip":11.18,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1507957,"team_id":4177,"id":23418,"first_name":"Luis Felipe","last_name":"Gallegos Leiva","short_name":"F. Gallegos","birthday":"1991-12-03","trackable_object":24342,"gender":"male"},{"player_role":{"id":11,"position_group":"Midfield","name":"Attacking Midfield","acronym":"AM"},"start_time":"00:00:00","end_time":"01:00:15","number":37,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":15.23,"minutes_otip":23.37,"start_frame":10,"end_frame":36949,"minutes_played":61.55,"minutes_played_regular_time":61.55},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":4.2,"minutes_otip":5.16,"start_frame":27800,"end_frame":36949,"minutes_played":15.25}]},"team_player_id":1204872,"team_id":1805,"id":795507,"first_name":"Lachlan","last_name":"Bayliss","short_name":"L. Bayliss","birthday":"2002-07-24","trackable_object":797070,"gender":"male"},{"player_role":{"id":13,"position_group":"Wide Attacker","name":"Right Winger","acronym":"RW"},"start_time":"00:00:00","end_time":"01:24:58","number":7,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":18.76,"minutes_otip":29.55,"start_frame":10,"end_frame":51779,"minutes_played":86.26,"minutes_played_regular_time":86.26},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":7.73,"minutes_otip":11.34,"start_frame":27800,"end_frame":51779,"minutes_played":39.97}]},"team_player_id":1508540,"team_id":1805,"id":795505,"first_name":"Eli","last_name":"Adams","short_name":"E. Adams","birthday":"2002-03-12","trackable_object":797068,"gender":"male"},{"player_role":{"id":15,"position_group":"Center Forward","name":"Center Forward","acronym":"CF"},"start_time":"00:00:00","end_time":"01:07:43","number":22,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":15.92,"minutes_otip":24.57,"start_frame":10,"end_frame":41429,"minutes_played":69.02,"minutes_played_regular_time":69.02},"by_period":[{"name":"period_1","minutes_tip":11.03,"minutes_otip":18.21,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":4.89,"minutes_otip":6.36,"start_frame":27800,"end_frame":41429,"minutes_played":22.72}]},"team_player_id":1508542,"team_id":1805,"id":966120,"first_name":"Benjamin","last_name":"Gibson","short_name":"B. Gibson","birthday":"2003-01-03","trackable_object":967683,"gender":"male"},{"player_role":{"id":17,"position_group":"Other","name":"Substitute","acronym":"SUB"},"start_time":null,"end_time":null,"number":3,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":null,"by_period":[]},"team_player_id":1507960,"team_id":4177,"id":23928,"first_name":"Scott","last_name":"Galloway","short_name":"S. Galloway","birthday":"1995-04-25","trackable_object":24886,"gender":"male"},{"player_role":{"id":17,"position_group":"Other","name":"Substitute","acronym":"SUB"},"start_time":null,"end_time":null,"number":1,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":null,"by_period":[]},"team_player_id":1507962,"team_id":4177,"id":14355,"first_name":"Michael","last_name":"Woud","short_name":"M. Woud","birthday":"1999-01-16","trackable_object":14552,"gender":"male"},{"player_role":{"id":17,"position_group":"Other","name":"Substitute","acronym":"SUB"},"start_time":null,"end_time":null,"number":8,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":null,"by_period":[]},"team_player_id":1507973,"team_id":4177,"id":965684,"first_name":"Luis","last_name":"Toomey","short_name":"L. Toomey","birthday":"2001-07-01","trackable_object":967247,"gender":"male"},{"player_role":{"id":17,"position_group":"Other","name":"Substitute","acronym":"SUB"},"start_time":null,"end_time":null,"number":7,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":null,"by_period":[]},"team_player_id":1507959,"team_id":4177,"id":133495,"first_name":"Cameron Drew","last_name":"Howieson","short_name":"C. Howieson","birthday":"1994-12-22","trackable_object":135050,"gender":"male"},{"player_role":{"id":0,"position_group":"Other","name":"Goalkeeper","acronym":"GK"},"start_time":"00:00:00","end_time":null,"number":12,"yellow_card":0,"red_card":0,"injured":false,"goal":0,"own_goal":0,"playing_time":{"total":{"minutes_tip":31.13,"minutes_otip":22.2,"start_frame":10,"end_frame":59059,"minutes_played":98.4,"minutes_played_regular_time":98.4},"by_period":[{"name":"period_1","minutes_tip":18.21,"minutes_otip":11.03,"start_frame":10,"end_frame":27790,"minutes_played":46.3},{"name":"period_2","minutes_tip":12.92,"minutes_otip":11.18,"start_frame":27800,"end_frame":59059,"minutes_played":52.1}]},"team_player_id":1507970,"team_id":4177,"id":285188,"first_name":"Alex Noah","last_name":"Paulsen","short_name":"A. Paulsen","birthday":"2002-07-04","trackable_object":286745,"gender":"male"}],"status":"closed","home_team_side":["right_to_left","left_to_right"],"ball":{"trackable_object":55},"pitch_length":104,"pitch_width":68} \ No newline at end of file diff --git a/kloppy/tests/files/skillcorner_v3_raw_data-2.jsonl b/kloppy/tests/files/skillcorner_v3_raw_data-2.jsonl new file mode 100644 index 000000000..b5d6c6cf1 --- /dev/null +++ b/kloppy/tests/files/skillcorner_v3_raw_data-2.jsonl @@ -0,0 +1,17 @@ +{"frame": 10, "timestamp": "00:00:00.00", "period": 1, "ball_data": {"x": 0.32, "y": 0.38, "z": 0.13, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -52.52, "y_top_left": 39.0, "x_bottom_left": -23.21, "y_bottom_left": -37.05, "x_bottom_right": 22.76, "y_bottom_right": -36.88, "x_top_right": 50.99, "y_top_right": 39.0}, "player_data": [{"x": -39.63, "y": -0.08, "player_id": 51009, "is_detected": false}, {"x": -19.21, "y": -9.18, "player_id": 176224, "is_detected": true}, {"x": -21.83, "y": 0.47, "player_id": 51649, "is_detected": true}, {"x": -1.16, "y": -32.47, "player_id": 50983, "is_detected": true}, {"x": -18.88, "y": 15.73, "player_id": 735578, "is_detected": true}, {"x": -7.41, "y": 7.13, "player_id": 50978, "is_detected": true}, {"x": -9.51, "y": -5.01, "player_id": 735574, "is_detected": true}, {"x": -2.5, "y": 7.27, "player_id": 795507, "is_detected": false}, {"x": -0.78, "y": -20.69, "player_id": 795505, "is_detected": true}, {"x": -1.85, "y": 18.8, "player_id": 735573, "is_detected": true}, {"x": 1.27, "y": 0.88, "player_id": 966120, "is_detected": true}, {"x": 40.47, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 17.85, "y": 5.52, "player_id": 51667, "is_detected": true}, {"x": 16.78, "y": -3.67, "player_id": 33697, "is_detected": true}, {"x": 17.03, "y": 14.69, "player_id": 51713, "is_detected": true}, {"x": 17.55, "y": -13.6, "player_id": 133498, "is_detected": true}, {"x": 11.7, "y": 6.73, "player_id": 14736, "is_detected": true}, {"x": 10.16, "y": -2.12, "player_id": 23418, "is_detected": true}, {"x": 0.91, "y": 18.96, "player_id": 133501, "is_detected": false}, {"x": 7.74, "y": -16.27, "player_id": 965685, "is_detected": true}, {"x": 0.4, "y": -8.28, "player_id": 50951, "is_detected": true}, {"x": 2.67, "y": 9.94, "player_id": 38673, "is_detected": true}]} +{"frame": 11, "timestamp": "00:00:00.10", "period": 1, "ball_data": {"x": 0.54, "y": 0.08, "z": 0.22, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -52.37, "y_top_left": 39.0, "x_bottom_left": -23.18, "y_bottom_left": -36.89, "x_bottom_right": 22.69, "y_bottom_right": -36.7, "x_top_right": 50.74, "y_top_right": 39.0}, "player_data": [{"x": -39.86, "y": -0.13, "player_id": 51009, "is_detected": false}, {"x": -19.23, "y": -9.23, "player_id": 176224, "is_detected": true}, {"x": -21.82, "y": 0.43, "player_id": 51649, "is_detected": true}, {"x": -1.14, "y": -32.57, "player_id": 50983, "is_detected": true}, {"x": -18.98, "y": 15.73, "player_id": 735578, "is_detected": true}, {"x": -7.37, "y": 7.13, "player_id": 50978, "is_detected": true}, {"x": -9.48, "y": -5.08, "player_id": 735574, "is_detected": true}, {"x": -2.29, "y": 7.33, "player_id": 795507, "is_detected": false}, {"x": -0.84, "y": -20.66, "player_id": 795505, "is_detected": true}, {"x": -1.82, "y": 18.78, "player_id": 735573, "is_detected": true}, {"x": 1.24, "y": 0.74, "player_id": 966120, "is_detected": true}, {"x": 40.65, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 17.92, "y": 5.42, "player_id": 51667, "is_detected": true}, {"x": 16.81, "y": -3.7, "player_id": 33697, "is_detected": true}, {"x": 17.09, "y": 14.62, "player_id": 51713, "is_detected": true}, {"x": 17.57, "y": -13.63, "player_id": 133498, "is_detected": true}, {"x": 11.58, "y": 6.69, "player_id": 14736, "is_detected": true}, {"x": 10.13, "y": -2.22, "player_id": 23418, "is_detected": true}, {"x": 0.98, "y": 18.84, "player_id": 133501, "is_detected": false}, {"x": 7.78, "y": -16.35, "player_id": 965685, "is_detected": true}, {"x": 0.44, "y": -8.39, "player_id": 50951, "is_detected": true}, {"x": 2.61, "y": 9.91, "player_id": 38673, "is_detected": true}]} +{"frame": 12, "timestamp": "00:00:00.20", "period": 1, "ball_data": {"x": 0.57, "y": -0.07, "z": 0.19, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -52.11, "y_top_left": 39.0, "x_bottom_left": -23.09, "y_bottom_left": -36.74, "x_bottom_right": 22.58, "y_bottom_right": -36.58, "x_top_right": 50.48, "y_top_right": 39.0}, "player_data": [{"x": -40.06, "y": -0.18, "player_id": 51009, "is_detected": false}, {"x": -19.24, "y": -9.27, "player_id": 176224, "is_detected": true}, {"x": -21.81, "y": 0.4, "player_id": 51649, "is_detected": true}, {"x": -1.13, "y": -32.66, "player_id": 50983, "is_detected": true}, {"x": -19.07, "y": 15.73, "player_id": 735578, "is_detected": true}, {"x": -7.32, "y": 7.14, "player_id": 50978, "is_detected": true}, {"x": -9.46, "y": -5.15, "player_id": 735574, "is_detected": true}, {"x": -2.09, "y": 7.39, "player_id": 795507, "is_detected": false}, {"x": -0.9, "y": -20.64, "player_id": 795505, "is_detected": true}, {"x": -1.8, "y": 18.77, "player_id": 735573, "is_detected": true}, {"x": 1.22, "y": 0.61, "player_id": 966120, "is_detected": true}, {"x": 40.83, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 17.98, "y": 5.34, "player_id": 51667, "is_detected": true}, {"x": 16.83, "y": -3.72, "player_id": 33697, "is_detected": true}, {"x": 17.13, "y": 14.55, "player_id": 51713, "is_detected": true}, {"x": 17.59, "y": -13.66, "player_id": 133498, "is_detected": true}, {"x": 11.46, "y": 6.66, "player_id": 14736, "is_detected": true}, {"x": 10.09, "y": -2.31, "player_id": 23418, "is_detected": true}, {"x": 1.05, "y": 18.74, "player_id": 133501, "is_detected": false}, {"x": 7.8, "y": -16.43, "player_id": 965685, "is_detected": true}, {"x": 0.48, "y": -8.49, "player_id": 50951, "is_detected": true}, {"x": 2.56, "y": 9.87, "player_id": 38673, "is_detected": true}]} +{"frame": 13, "timestamp": "00:00:00.30", "period": 1, "ball_data": {"x": 0.56, "y": -0.07, "z": 0.14, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -52.02, "y_top_left": 39.0, "x_bottom_left": -23.09, "y_bottom_left": -36.67, "x_bottom_right": 22.54, "y_bottom_right": -36.51, "x_top_right": 50.28, "y_top_right": 39.0}, "player_data": [{"x": -40.24, "y": -0.22, "player_id": 51009, "is_detected": false}, {"x": -19.25, "y": -9.31, "player_id": 176224, "is_detected": true}, {"x": -21.8, "y": 0.36, "player_id": 51649, "is_detected": true}, {"x": -1.12, "y": -32.74, "player_id": 50983, "is_detected": true}, {"x": -19.16, "y": 15.73, "player_id": 735578, "is_detected": true}, {"x": -7.28, "y": 7.14, "player_id": 50978, "is_detected": true}, {"x": -9.45, "y": -5.2, "player_id": 735574, "is_detected": true}, {"x": -1.91, "y": 7.45, "player_id": 795507, "is_detected": false}, {"x": -0.95, "y": -20.62, "player_id": 795505, "is_detected": true}, {"x": -1.76, "y": 18.76, "player_id": 735573, "is_detected": true}, {"x": 1.19, "y": 0.51, "player_id": 966120, "is_detected": true}, {"x": 40.98, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 18.04, "y": 5.26, "player_id": 51667, "is_detected": true}, {"x": 16.86, "y": -3.73, "player_id": 33697, "is_detected": true}, {"x": 17.17, "y": 14.48, "player_id": 51713, "is_detected": true}, {"x": 17.6, "y": -13.68, "player_id": 133498, "is_detected": true}, {"x": 11.34, "y": 6.63, "player_id": 14736, "is_detected": true}, {"x": 10.06, "y": -2.39, "player_id": 23418, "is_detected": true}, {"x": 1.11, "y": 18.63, "player_id": 133501, "is_detected": false}, {"x": 7.83, "y": -16.51, "player_id": 965685, "is_detected": true}, {"x": 0.51, "y": -8.57, "player_id": 50951, "is_detected": true}, {"x": 2.5, "y": 9.83, "player_id": 38673, "is_detected": true}]} +{"frame": 14, "timestamp": "00:00:00.40", "period": 1, "ball_data": {"x": 0.59, "y": -0.03, "z": 0.14, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.78, "y_top_left": 39.0, "x_bottom_left": -22.98, "y_bottom_left": -36.59, "x_bottom_right": 22.49, "y_bottom_right": -36.4, "x_top_right": 50.16, "y_top_right": 39.0}, "player_data": [{"x": -40.39, "y": -0.25, "player_id": 51009, "is_detected": false}, {"x": -19.26, "y": -9.36, "player_id": 176224, "is_detected": true}, {"x": -21.78, "y": 0.34, "player_id": 51649, "is_detected": true}, {"x": -1.11, "y": -32.82, "player_id": 50983, "is_detected": true}, {"x": -19.25, "y": 15.73, "player_id": 735578, "is_detected": true}, {"x": -7.23, "y": 7.13, "player_id": 50978, "is_detected": true}, {"x": -9.44, "y": -5.24, "player_id": 735574, "is_detected": true}, {"x": -1.75, "y": 7.52, "player_id": 795507, "is_detected": false}, {"x": -1.0, "y": -20.6, "player_id": 795505, "is_detected": true}, {"x": -1.73, "y": 18.75, "player_id": 735573, "is_detected": true}, {"x": 1.17, "y": 0.41, "player_id": 966120, "is_detected": true}, {"x": 41.12, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 18.08, "y": 5.19, "player_id": 51667, "is_detected": true}, {"x": 16.88, "y": -3.74, "player_id": 33697, "is_detected": true}, {"x": 17.19, "y": 14.43, "player_id": 51713, "is_detected": true}, {"x": 17.62, "y": -13.71, "player_id": 133498, "is_detected": true}, {"x": 11.22, "y": 6.61, "player_id": 14736, "is_detected": true}, {"x": 10.02, "y": -2.45, "player_id": 23418, "is_detected": true}, {"x": 1.17, "y": 18.53, "player_id": 133501, "is_detected": false}, {"x": 7.84, "y": -16.57, "player_id": 965685, "is_detected": true}, {"x": 0.53, "y": -8.64, "player_id": 50951, "is_detected": true}, {"x": 2.45, "y": 9.79, "player_id": 38673, "is_detected": true}]} +{"frame": 15, "timestamp": "00:00:00.50", "period": 1, "ball_data": {"x": 0.63, "y": 0.02, "z": 0.14, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.61, "y_top_left": 39.0, "x_bottom_left": -22.99, "y_bottom_left": -36.5, "x_bottom_right": 22.4, "y_bottom_right": -36.42, "x_top_right": 49.91, "y_top_right": 39.0}, "player_data": [{"x": -40.52, "y": -0.28, "player_id": 51009, "is_detected": false}, {"x": -19.27, "y": -9.4, "player_id": 176224, "is_detected": true}, {"x": -21.77, "y": 0.31, "player_id": 51649, "is_detected": true}, {"x": -1.1, "y": -32.88, "player_id": 50983, "is_detected": true}, {"x": -19.33, "y": 15.73, "player_id": 735578, "is_detected": true}, {"x": -7.19, "y": 7.12, "player_id": 50978, "is_detected": true}, {"x": -9.43, "y": -5.28, "player_id": 735574, "is_detected": true}, {"x": -1.6, "y": 7.59, "player_id": 795507, "is_detected": false}, {"x": -1.04, "y": -20.59, "player_id": 795505, "is_detected": true}, {"x": -1.7, "y": 18.75, "player_id": 735573, "is_detected": true}, {"x": 1.15, "y": 0.34, "player_id": 966120, "is_detected": true}, {"x": 41.25, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 18.12, "y": 5.13, "player_id": 51667, "is_detected": true}, {"x": 16.9, "y": -3.74, "player_id": 33697, "is_detected": true}, {"x": 17.2, "y": 14.37, "player_id": 51713, "is_detected": true}, {"x": 17.63, "y": -13.73, "player_id": 133498, "is_detected": true}, {"x": 11.11, "y": 6.59, "player_id": 14736, "is_detected": true}, {"x": 9.98, "y": -2.51, "player_id": 23418, "is_detected": true}, {"x": 1.22, "y": 18.44, "player_id": 133501, "is_detected": false}, {"x": 7.85, "y": -16.64, "player_id": 965685, "is_detected": true}, {"x": 0.55, "y": -8.69, "player_id": 50951, "is_detected": true}, {"x": 2.4, "y": 9.75, "player_id": 38673, "is_detected": true}]} +{"frame": 16, "timestamp": "00:00:00.60", "period": 1, "ball_data": {"x": 0.65, "y": 0.03, "z": 0.14, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.55, "y_top_left": 39.0, "x_bottom_left": -22.93, "y_bottom_left": -36.56, "x_bottom_right": 22.38, "y_bottom_right": -36.37, "x_top_right": 49.8, "y_top_right": 39.0}, "player_data": [{"x": -40.63, "y": -0.3, "player_id": 51009, "is_detected": false}, {"x": -19.28, "y": -9.43, "player_id": 176224, "is_detected": true}, {"x": -21.75, "y": 0.29, "player_id": 51649, "is_detected": true}, {"x": -1.09, "y": -32.93, "player_id": 50983, "is_detected": true}, {"x": -19.41, "y": 15.73, "player_id": 735578, "is_detected": true}, {"x": -7.14, "y": 7.11, "player_id": 50978, "is_detected": true}, {"x": -9.43, "y": -5.3, "player_id": 735574, "is_detected": true}, {"x": -1.47, "y": 7.66, "player_id": 795507, "is_detected": false}, {"x": -1.08, "y": -20.57, "player_id": 795505, "is_detected": true}, {"x": -1.66, "y": 18.75, "player_id": 735573, "is_detected": true}, {"x": 1.13, "y": 0.28, "player_id": 966120, "is_detected": true}, {"x": 41.36, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 18.15, "y": 5.08, "player_id": 51667, "is_detected": true}, {"x": 16.93, "y": -3.74, "player_id": 33697, "is_detected": true}, {"x": 17.2, "y": 14.33, "player_id": 51713, "is_detected": true}, {"x": 17.63, "y": -13.75, "player_id": 133498, "is_detected": true}, {"x": 11.0, "y": 6.58, "player_id": 14736, "is_detected": true}, {"x": 9.93, "y": -2.56, "player_id": 23418, "is_detected": true}, {"x": 1.26, "y": 18.35, "player_id": 133501, "is_detected": false}, {"x": 7.85, "y": -16.69, "player_id": 965685, "is_detected": true}, {"x": 0.56, "y": -8.73, "player_id": 50951, "is_detected": true}, {"x": 2.35, "y": 9.7, "player_id": 38673, "is_detected": true}]} +{"frame": 17, "timestamp": "00:00:00.70", "period": 1, "ball_data": {"x": 0.66, "y": 0.05, "z": 0.14, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.62, "y_top_left": 39.0, "x_bottom_left": -22.93, "y_bottom_left": -36.58, "x_bottom_right": 22.37, "y_bottom_right": -36.37, "x_top_right": 49.81, "y_top_right": 39.0}, "player_data": [{"x": -40.71, "y": -0.31, "player_id": 51009, "is_detected": false}, {"x": -19.28, "y": -9.47, "player_id": 176224, "is_detected": true}, {"x": -21.74, "y": 0.27, "player_id": 51649, "is_detected": true}, {"x": -1.08, "y": -32.98, "player_id": 50983, "is_detected": true}, {"x": -19.48, "y": 15.72, "player_id": 735578, "is_detected": true}, {"x": -7.09, "y": 7.09, "player_id": 50978, "is_detected": true}, {"x": -9.44, "y": -5.31, "player_id": 735574, "is_detected": true}, {"x": -1.35, "y": 7.73, "player_id": 795507, "is_detected": false}, {"x": -1.11, "y": -20.56, "player_id": 795505, "is_detected": true}, {"x": -1.62, "y": 18.76, "player_id": 735573, "is_detected": true}, {"x": 1.11, "y": 0.24, "player_id": 966120, "is_detected": true}, {"x": 41.45, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 18.17, "y": 5.04, "player_id": 51667, "is_detected": true}, {"x": 16.95, "y": -3.73, "player_id": 33697, "is_detected": true}, {"x": 17.19, "y": 14.29, "player_id": 51713, "is_detected": true}, {"x": 17.64, "y": -13.78, "player_id": 133498, "is_detected": true}, {"x": 10.89, "y": 6.56, "player_id": 14736, "is_detected": true}, {"x": 9.89, "y": -2.6, "player_id": 23418, "is_detected": true}, {"x": 1.3, "y": 18.26, "player_id": 133501, "is_detected": false}, {"x": 7.85, "y": -16.74, "player_id": 965685, "is_detected": true}, {"x": 0.56, "y": -8.75, "player_id": 50951, "is_detected": true}, {"x": 2.3, "y": 9.65, "player_id": 38673, "is_detected": true}]} +{"frame": 18, "timestamp": "00:00:00.80", "period": 1, "ball_data": {"x": 0.67, "y": 0.06, "z": 0.15, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.57, "y_top_left": 39.0, "x_bottom_left": -22.89, "y_bottom_left": -36.59, "x_bottom_right": 22.3, "y_bottom_right": -36.39, "x_top_right": 49.71, "y_top_right": 39.0}, "player_data": [{"x": -40.72, "y": -0.29, "player_id": 51009, "is_detected": false}, {"x": -19.27, "y": -9.5, "player_id": 176224, "is_detected": true}, {"x": -21.72, "y": 0.27, "player_id": 51649, "is_detected": true}, {"x": -1.08, "y": -32.99, "player_id": 50983, "is_detected": true}, {"x": -19.57, "y": 15.7, "player_id": 735578, "is_detected": true}, {"x": -7.05, "y": 7.04, "player_id": 50978, "is_detected": true}, {"x": -9.47, "y": -5.3, "player_id": 735574, "is_detected": true}, {"x": -1.3, "y": 7.83, "player_id": 795507, "is_detected": false}, {"x": -1.14, "y": -20.54, "player_id": 795505, "is_detected": true}, {"x": -1.57, "y": 18.81, "player_id": 735573, "is_detected": true}, {"x": 1.11, "y": 0.24, "player_id": 966120, "is_detected": true}, {"x": 41.49, "y": 0.25, "player_id": 285188, "is_detected": false}, {"x": 18.17, "y": 5.02, "player_id": 51667, "is_detected": true}, {"x": 16.94, "y": -3.7, "player_id": 33697, "is_detected": true}, {"x": 17.15, "y": 14.27, "player_id": 51713, "is_detected": true}, {"x": 17.64, "y": -13.78, "player_id": 133498, "is_detected": true}, {"x": 10.77, "y": 6.58, "player_id": 14736, "is_detected": true}, {"x": 9.84, "y": -2.61, "player_id": 23418, "is_detected": true}, {"x": 1.35, "y": 18.22, "player_id": 133501, "is_detected": false}, {"x": 7.81, "y": -16.77, "player_id": 965685, "is_detected": true}, {"x": 0.55, "y": -8.74, "player_id": 50951, "is_detected": true}, {"x": 2.26, "y": 9.61, "player_id": 38673, "is_detected": true}]} +{"frame": 19, "timestamp": "00:00:00.90", "period": 1, "ball_data": {"x": 0.69, "y": 0.01, "z": 0.15, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.48, "y_top_left": 39.0, "x_bottom_left": -22.86, "y_bottom_left": -36.57, "x_bottom_right": 22.32, "y_bottom_right": -36.37, "x_top_right": 49.74, "y_top_right": 39.0}, "player_data": [{"x": -40.73, "y": -0.27, "player_id": 51009, "is_detected": false}, {"x": -19.27, "y": -9.53, "player_id": 176224, "is_detected": true}, {"x": -21.71, "y": 0.25, "player_id": 51649, "is_detected": true}, {"x": -1.09, "y": -33.03, "player_id": 50983, "is_detected": true}, {"x": -19.64, "y": 15.69, "player_id": 735578, "is_detected": true}, {"x": -7.0, "y": 7.0, "player_id": 50978, "is_detected": true}, {"x": -9.5, "y": -5.28, "player_id": 735574, "is_detected": true}, {"x": -1.25, "y": 7.93, "player_id": 795507, "is_detected": false}, {"x": -1.16, "y": -20.55, "player_id": 795505, "is_detected": true}, {"x": -1.52, "y": 18.84, "player_id": 735573, "is_detected": true}, {"x": 1.1, "y": 0.23, "player_id": 966120, "is_detected": true}, {"x": 41.53, "y": 0.25, "player_id": 285188, "is_detected": false}, {"x": 18.17, "y": 5.01, "player_id": 51667, "is_detected": true}, {"x": 16.96, "y": -3.66, "player_id": 33697, "is_detected": true}, {"x": 17.11, "y": 14.25, "player_id": 51713, "is_detected": true}, {"x": 17.64, "y": -13.8, "player_id": 133498, "is_detected": true}, {"x": 10.67, "y": 6.6, "player_id": 14736, "is_detected": true}, {"x": 9.79, "y": -2.61, "player_id": 23418, "is_detected": true}, {"x": 1.37, "y": 18.16, "player_id": 133501, "is_detected": false}, {"x": 7.79, "y": -16.8, "player_id": 965685, "is_detected": true}, {"x": 0.55, "y": -8.73, "player_id": 50951, "is_detected": true}, {"x": 2.22, "y": 9.56, "player_id": 38673, "is_detected": true}]} +{"frame": 20, "timestamp": "00:00:01.00", "period": 1, "ball_data": {"x": 0.69, "y": -0.05, "z": 0.16, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.46, "y_top_left": 39.0, "x_bottom_left": -22.87, "y_bottom_left": -36.57, "x_bottom_right": 22.29, "y_bottom_right": -36.35, "x_top_right": 49.59, "y_top_right": 39.0}, "player_data": [{"x": -40.74, "y": -0.26, "player_id": 51009, "is_detected": false}, {"x": -19.27, "y": -9.57, "player_id": 176224, "is_detected": true}, {"x": -21.7, "y": 0.25, "player_id": 51649, "is_detected": true}, {"x": -1.1, "y": -33.05, "player_id": 50983, "is_detected": true}, {"x": -19.69, "y": 15.69, "player_id": 735578, "is_detected": true}, {"x": -6.94, "y": 6.96, "player_id": 50978, "is_detected": true}, {"x": -9.52, "y": -5.26, "player_id": 735574, "is_detected": true}, {"x": -1.17, "y": 8.01, "player_id": 795507, "is_detected": false}, {"x": -1.18, "y": -20.55, "player_id": 795505, "is_detected": true}, {"x": -1.47, "y": 18.87, "player_id": 735573, "is_detected": true}, {"x": 1.09, "y": 0.24, "player_id": 966120, "is_detected": true}, {"x": 41.58, "y": 0.24, "player_id": 285188, "is_detected": false}, {"x": 18.17, "y": 4.99, "player_id": 51667, "is_detected": true}, {"x": 16.98, "y": -3.63, "player_id": 33697, "is_detected": true}, {"x": 17.06, "y": 14.23, "player_id": 51713, "is_detected": true}, {"x": 17.62, "y": -13.83, "player_id": 133498, "is_detected": true}, {"x": 10.57, "y": 6.59, "player_id": 14736, "is_detected": true}, {"x": 9.74, "y": -2.61, "player_id": 23418, "is_detected": true}, {"x": 1.39, "y": 18.08, "player_id": 133501, "is_detected": false}, {"x": 7.75, "y": -16.82, "player_id": 965685, "is_detected": true}, {"x": 0.54, "y": -8.71, "player_id": 50951, "is_detected": true}, {"x": 2.17, "y": 9.5, "player_id": 38673, "is_detected": true}]} +{"frame": 21, "timestamp": "00:00:01.10", "period": 1, "ball_data": {"x": 0.67, "y": -0.08, "z": 0.16, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.33, "y_top_left": 39.0, "x_bottom_left": -22.86, "y_bottom_left": -36.49, "x_bottom_right": 22.15, "y_bottom_right": -36.31, "x_top_right": 49.26, "y_top_right": 39.0}, "player_data": [{"x": -40.74, "y": -0.26, "player_id": 51009, "is_detected": false}, {"x": -19.27, "y": -9.6, "player_id": 176224, "is_detected": true}, {"x": -21.68, "y": 0.24, "player_id": 51649, "is_detected": true}, {"x": -1.08, "y": -33.06, "player_id": 50983, "is_detected": true}, {"x": -19.73, "y": 15.69, "player_id": 735578, "is_detected": true}, {"x": -6.87, "y": 6.93, "player_id": 50978, "is_detected": true}, {"x": -9.54, "y": -5.23, "player_id": 735574, "is_detected": true}, {"x": -1.1, "y": 8.06, "player_id": 795507, "is_detected": false}, {"x": -1.2, "y": -20.55, "player_id": 795505, "is_detected": true}, {"x": -1.41, "y": 18.88, "player_id": 735573, "is_detected": true}, {"x": 1.08, "y": 0.26, "player_id": 966120, "is_detected": true}, {"x": 41.62, "y": 0.23, "player_id": 285188, "is_detected": false}, {"x": 18.16, "y": 4.97, "player_id": 51667, "is_detected": true}, {"x": 17.02, "y": -3.61, "player_id": 33697, "is_detected": true}, {"x": 17.0, "y": 14.2, "player_id": 51713, "is_detected": true}, {"x": 17.61, "y": -13.85, "player_id": 133498, "is_detected": true}, {"x": 10.47, "y": 6.58, "player_id": 14736, "is_detected": true}, {"x": 9.69, "y": -2.61, "player_id": 23418, "is_detected": true}, {"x": 1.4, "y": 17.98, "player_id": 133501, "is_detected": false}, {"x": 7.72, "y": -16.84, "player_id": 965685, "is_detected": true}, {"x": 0.52, "y": -8.67, "player_id": 50951, "is_detected": true}, {"x": 2.12, "y": 9.43, "player_id": 38673, "is_detected": true}]} +{"frame": 22, "timestamp": "00:00:01.20", "period": 1, "ball_data": {"x": 0.63, "y": -0.06, "z": 0.18, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.28, "y_top_left": 39.0, "x_bottom_left": -22.83, "y_bottom_left": -36.57, "x_bottom_right": 22.2, "y_bottom_right": -36.37, "x_top_right": 49.34, "y_top_right": 39.0}, "player_data": [{"x": -40.74, "y": -0.27, "player_id": 51009, "is_detected": false}, {"x": -19.28, "y": -9.63, "player_id": 176224, "is_detected": true}, {"x": -21.66, "y": 0.24, "player_id": 51649, "is_detected": true}, {"x": -1.04, "y": -33.08, "player_id": 50983, "is_detected": true}, {"x": -19.79, "y": 15.69, "player_id": 735578, "is_detected": true}, {"x": -6.81, "y": 6.91, "player_id": 50978, "is_detected": true}, {"x": -9.55, "y": -5.2, "player_id": 735574, "is_detected": true}, {"x": -1.0, "y": 8.11, "player_id": 795507, "is_detected": false}, {"x": -1.22, "y": -20.55, "player_id": 795505, "is_detected": true}, {"x": -1.37, "y": 18.88, "player_id": 735573, "is_detected": true}, {"x": 1.05, "y": 0.3, "player_id": 966120, "is_detected": true}, {"x": 41.66, "y": 0.21, "player_id": 285188, "is_detected": false}, {"x": 18.15, "y": 4.95, "player_id": 51667, "is_detected": true}, {"x": 17.06, "y": -3.59, "player_id": 33697, "is_detected": true}, {"x": 16.95, "y": 14.18, "player_id": 51713, "is_detected": true}, {"x": 17.6, "y": -13.88, "player_id": 133498, "is_detected": true}, {"x": 10.36, "y": 6.57, "player_id": 14736, "is_detected": true}, {"x": 9.63, "y": -2.62, "player_id": 23418, "is_detected": true}, {"x": 1.41, "y": 17.88, "player_id": 133501, "is_detected": false}, {"x": 7.7, "y": -16.88, "player_id": 965685, "is_detected": true}, {"x": 0.5, "y": -8.62, "player_id": 50951, "is_detected": true}, {"x": 2.07, "y": 9.38, "player_id": 38673, "is_detected": true}]} +{"frame": 23, "timestamp": "00:00:01.30", "period": 1, "ball_data": {"x": 0.57, "y": 0.03, "z": 0.22, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.24, "y_top_left": 39.0, "x_bottom_left": -22.77, "y_bottom_left": -36.53, "x_bottom_right": 22.14, "y_bottom_right": -36.24, "x_top_right": 49.17, "y_top_right": 39.0}, "player_data": [{"x": -40.75, "y": -0.27, "player_id": 51009, "is_detected": false}, {"x": -19.28, "y": -9.64, "player_id": 176224, "is_detected": true}, {"x": -21.65, "y": 0.24, "player_id": 51649, "is_detected": true}, {"x": -1.0, "y": -33.09, "player_id": 50983, "is_detected": true}, {"x": -19.84, "y": 15.7, "player_id": 735578, "is_detected": true}, {"x": -6.76, "y": 6.9, "player_id": 50978, "is_detected": true}, {"x": -9.56, "y": -5.17, "player_id": 735574, "is_detected": true}, {"x": -0.88, "y": 8.14, "player_id": 795507, "is_detected": false}, {"x": -1.24, "y": -20.55, "player_id": 795505, "is_detected": true}, {"x": -1.33, "y": 18.87, "player_id": 735573, "is_detected": true}, {"x": 1.01, "y": 0.33, "player_id": 966120, "is_detected": true}, {"x": 41.7, "y": 0.19, "player_id": 285188, "is_detected": false}, {"x": 18.13, "y": 4.93, "player_id": 51667, "is_detected": true}, {"x": 17.11, "y": -3.57, "player_id": 33697, "is_detected": true}, {"x": 16.89, "y": 14.15, "player_id": 51713, "is_detected": true}, {"x": 17.59, "y": -13.92, "player_id": 133498, "is_detected": true}, {"x": 10.26, "y": 6.55, "player_id": 14736, "is_detected": true}, {"x": 9.56, "y": -2.64, "player_id": 23418, "is_detected": true}, {"x": 1.39, "y": 17.77, "player_id": 133501, "is_detected": true}, {"x": 7.68, "y": -16.92, "player_id": 965685, "is_detected": true}, {"x": 0.47, "y": -8.57, "player_id": 50951, "is_detected": true}, {"x": 2.03, "y": 9.33, "player_id": 38673, "is_detected": true}]} +{"frame": 24, "timestamp": "00:00:01.40", "period": 1, "ball_data": {"x": 0.53, "y": 0.01, "z": 0.28, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.09, "y_top_left": 39.0, "x_bottom_left": -22.75, "y_bottom_left": -36.47, "x_bottom_right": 22.1, "y_bottom_right": -36.28, "x_top_right": 49.11, "y_top_right": 39.0}, "player_data": [{"x": -40.75, "y": -0.28, "player_id": 51009, "is_detected": false}, {"x": -19.29, "y": -9.66, "player_id": 176224, "is_detected": true}, {"x": -21.65, "y": 0.23, "player_id": 51649, "is_detected": true}, {"x": -0.94, "y": -33.1, "player_id": 50983, "is_detected": true}, {"x": -19.89, "y": 15.71, "player_id": 735578, "is_detected": true}, {"x": -6.71, "y": 6.89, "player_id": 50978, "is_detected": true}, {"x": -9.57, "y": -5.14, "player_id": 735574, "is_detected": true}, {"x": -0.76, "y": 8.16, "player_id": 795507, "is_detected": true}, {"x": -1.24, "y": -20.54, "player_id": 795505, "is_detected": true}, {"x": -1.29, "y": 18.85, "player_id": 735573, "is_detected": true}, {"x": 0.96, "y": 0.37, "player_id": 966120, "is_detected": true}, {"x": 41.75, "y": 0.18, "player_id": 285188, "is_detected": false}, {"x": 18.1, "y": 4.91, "player_id": 51667, "is_detected": true}, {"x": 17.16, "y": -3.56, "player_id": 33697, "is_detected": true}, {"x": 16.83, "y": 14.13, "player_id": 51713, "is_detected": true}, {"x": 17.57, "y": -13.96, "player_id": 133498, "is_detected": true}, {"x": 10.16, "y": 6.53, "player_id": 14736, "is_detected": true}, {"x": 9.48, "y": -2.65, "player_id": 23418, "is_detected": true}, {"x": 1.35, "y": 17.66, "player_id": 133501, "is_detected": true}, {"x": 7.66, "y": -16.97, "player_id": 965685, "is_detected": true}, {"x": 0.41, "y": -8.5, "player_id": 50951, "is_detected": true}, {"x": 1.98, "y": 9.28, "player_id": 38673, "is_detected": true}]} +{"frame": 25, "timestamp": "00:00:01.50", "period": 1, "ball_data": {"x": 0.53, "y": -0.17, "z": 0.31, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -51.18, "y_top_left": 39.0, "x_bottom_left": -22.76, "y_bottom_left": -36.51, "x_bottom_right": 22.12, "y_bottom_right": -36.32, "x_top_right": 49.21, "y_top_right": 39.0}, "player_data": [{"x": -40.75, "y": -0.29, "player_id": 51009, "is_detected": false}, {"x": -19.29, "y": -9.66, "player_id": 176224, "is_detected": true}, {"x": -21.65, "y": 0.23, "player_id": 51649, "is_detected": true}, {"x": -0.88, "y": -33.09, "player_id": 50983, "is_detected": true}, {"x": -19.94, "y": 15.72, "player_id": 735578, "is_detected": true}, {"x": -6.66, "y": 6.9, "player_id": 50978, "is_detected": true}, {"x": -9.57, "y": -5.11, "player_id": 735574, "is_detected": true}, {"x": -0.61, "y": 8.16, "player_id": 795507, "is_detected": true}, {"x": -1.23, "y": -20.53, "player_id": 795505, "is_detected": true}, {"x": -1.25, "y": 18.82, "player_id": 735573, "is_detected": true}, {"x": 0.91, "y": 0.41, "player_id": 966120, "is_detected": true}, {"x": 41.8, "y": 0.16, "player_id": 285188, "is_detected": false}, {"x": 18.07, "y": 4.89, "player_id": 51667, "is_detected": true}, {"x": 17.23, "y": -3.56, "player_id": 33697, "is_detected": true}, {"x": 16.76, "y": 14.11, "player_id": 51713, "is_detected": true}, {"x": 17.56, "y": -14.01, "player_id": 133498, "is_detected": true}, {"x": 10.06, "y": 6.51, "player_id": 14736, "is_detected": true}, {"x": 9.39, "y": -2.68, "player_id": 23418, "is_detected": true}, {"x": 1.3, "y": 17.53, "player_id": 133501, "is_detected": true}, {"x": 7.65, "y": -17.02, "player_id": 965685, "is_detected": true}, {"x": 0.34, "y": -8.43, "player_id": 50951, "is_detected": true}, {"x": 1.93, "y": 9.25, "player_id": 38673, "is_detected": true}]} +{"frame": 26, "timestamp": "00:00:01.60", "period": 1, "ball_data": {"x": 0.57, "y": -0.34, "z": 0.29, "is_detected": true}, "possession": {"player_id": null, "group": null}, "image_corners_projection": {"x_top_left": -50.99, "y_top_left": 39.0, "x_bottom_left": -22.7, "y_bottom_left": -36.46, "x_bottom_right": 22.03, "y_bottom_right": -36.28, "x_top_right": 48.99, "y_top_right": 39.0}, "player_data": [{"x": -40.74, "y": -0.29, "player_id": 51009, "is_detected": false}, {"x": -19.3, "y": -9.66, "player_id": 176224, "is_detected": true}, {"x": -21.66, "y": 0.22, "player_id": 51649, "is_detected": true}, {"x": -0.81, "y": -33.08, "player_id": 50983, "is_detected": true}, {"x": -20.0, "y": 15.73, "player_id": 735578, "is_detected": true}, {"x": -6.61, "y": 6.91, "player_id": 50978, "is_detected": true}, {"x": -9.58, "y": -5.08, "player_id": 735574, "is_detected": true}, {"x": -0.46, "y": 8.16, "player_id": 795507, "is_detected": true}, {"x": -1.2, "y": -20.52, "player_id": 795505, "is_detected": true}, {"x": -1.2, "y": 18.79, "player_id": 735573, "is_detected": true}, {"x": 0.84, "y": 0.45, "player_id": 966120, "is_detected": true}, {"x": 41.85, "y": 0.16, "player_id": 285188, "is_detected": false}, {"x": 18.03, "y": 4.86, "player_id": 51667, "is_detected": true}, {"x": 17.29, "y": -3.55, "player_id": 33697, "is_detected": true}, {"x": 16.69, "y": 14.1, "player_id": 51713, "is_detected": true}, {"x": 17.54, "y": -14.06, "player_id": 133498, "is_detected": true}, {"x": 9.95, "y": 6.49, "player_id": 14736, "is_detected": true}, {"x": 9.28, "y": -2.71, "player_id": 23418, "is_detected": true}, {"x": 1.24, "y": 17.41, "player_id": 133501, "is_detected": true}, {"x": 7.64, "y": -17.08, "player_id": 965685, "is_detected": true}, {"x": 0.24, "y": -8.34, "player_id": 50951, "is_detected": true}, {"x": 1.88, "y": 9.23, "player_id": 38673, "is_detected": true}]} \ No newline at end of file From c4055e7c22234993787321df01563336da56fbbc Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 14:38:41 +0100 Subject: [PATCH 13/16] improved meta data --- .../serializers/tracking/cdf/serializer.py | 17 +- kloppy/tests/test_cdf.py | 324 +++++++----------- 2 files changed, 141 insertions(+), 200 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 7841f60ed..ac5d54056 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -11,6 +11,7 @@ Ground, ) from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer +from kloppy import __version__ from .helpers import ( PERIODS_MAP, @@ -169,6 +170,14 @@ def _build_default_metadata_structure( period_tracking, ) -> "CdfMetaDataSchema": """Build default CDF metadata structure from dataset.""" + try: + from cdf import VERSION + except ImportError: + raise ImportError( + "Seems like you don't have common-data-format-validator installed. Please" + " install it using: pip install common-data-format-validator" + ) + first_frame = dataset[0] home_starters, home_formation = get_starters_and_formation( @@ -226,11 +235,15 @@ def _build_default_metadata_structure( }, "meta": { "video": None, - "tracking": None, + "tracking": { + "fps": dataset.metadata.frame_rate, + "name": dataset.metadata.provider.name.lower(), + "converted_by": f"kloppy-cdf-converter-{__version__}", + }, "landmarks": None, "ball": None, "meta": None, - "cdf": None, + "cdf": {"version": VERSION}, "event": None, }, } diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 7a4e66f8c..bb06d6f8c 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -2,10 +2,6 @@ from pathlib import Path import pytest -import cdf - -import json -import warnings from kloppy import sportec, skillcorner from kloppy.domain import TrackingDataset, PositionType @@ -18,201 +14,97 @@ ) -def produces_valid_cdf_output(dataset): +def mimimum_valid_cdf_output( + dataset, meta_data_validator, tracking_data_validator, tmp_path +): """Test that CDFTrackingDataSerializer produces valid CDF output.""" - serializer = CDFTrackingDataSerializer() - - # Instantiate Validators - meta_validator = cdf.MetaSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" - ) - tracking_validator = cdf.TrackingSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" - ) - - with tempfile.NamedTemporaryFile( - mode="w+b", suffix=".json", delete=False - ) as meta_file, tempfile.NamedTemporaryFile( - mode="w+b", suffix=".jsonl", delete=False - ) as tracking_file: - - # Instantiate the named tuple for outputs - outputs = CDFOutputs(meta_data=meta_file, tracking_data=tracking_file) - - # Serialize the dataset and capture warnings - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - success = serializer.serialize(dataset, outputs) - assert success is True - - # Verify warnings about missing mandatory IDs were raised - missing_id_warnings = [ - warning - for warning in w - if issubclass(warning.category, UserWarning) - and "Missing mandatory ID" in str(warning.message) - ] - - # Should have warnings for competition.id, season.id, and stadium.id - assert len(missing_id_warnings) == 3, ( - f"Expected 3 missing mandatory ID warnings, but got {len(missing_id_warnings)}: " - f"{[str(warning.message) for warning in missing_id_warnings]}" - ) - - # Check specific warnings are present - warning_messages = [ - str(warning.message) for warning in missing_id_warnings - ] - assert any( - "competition.id" in msg for msg in warning_messages - ), "Missing warning for competition.id" - assert any( - "season.id" in msg for msg in warning_messages - ), "Missing warning for season.id" - assert any( - "stadium.id" in msg for msg in warning_messages - ), "Missing warning for stadium.id" - - # Save paths for validation after leaving the block - meta_path = meta_file.name - tracking_path = tracking_file.name + meta_path = tmp_path / "metadata.json" + tracking_path = tmp_path / "tracking.jsonl" - # Validate metadata - meta_validator.validate_schema(sample=meta_path) - - # Validate tracking data - read and validate each line (frame) in the JSONL file - with open(tracking_path, "r") as f: - frame_count = 0 - for line in f: - if line.strip(): # Skip empty lines - frame_data = json.loads(line) - # Validate each frame against the tracking schema - tracking_validator.validate_schema(sample=frame_data) - frame_count += 1 + with pytest.warns( + UserWarning, + ): + dataset.to_cdf( + metadata_output_file=str(meta_path), + tracking_output_file=str(tracking_path), + additional_metadata={}, + ) - assert frame_count > 0, "No frames were serialized" + with pytest.raises(Exception, match="'version' is a required property"): + meta_data_validator.validate_schema(sample=meta_path) + + dataset.to_cdf( + metadata_output_file=str(meta_path), + tracking_output_file=str(tracking_path), + additional_metadata={ + "competition": dict( + id="61", + ), + "season": dict( + id="95", + ), + "stadium": dict( + id="2914", + ), + "meta": dict( + tracking=dict(version="v3", collection_timing="post_match") + ), + }, + ) - # Clean up - Path(meta_path).unlink() - Path(tracking_path).unlink() + meta_data_validator.validate_schema(sample=meta_path) + tracking_data_validator.validate_schema(sample=tracking_path, limit=None) -def produces_valid_cdf_output_with_additional_metadata(dataset): +def produces_valid_cdf_output_with_additional_metadata( + dataset, meta_data_validator, tracking_data_validator, tmp_path +): """Test that CDFTrackingDataSerializer produces valid CDF output with additional metadata.""" - serializer = CDFTrackingDataSerializer() - # Instantiate Validators - meta_validator = cdf.MetaSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" - ) - tracking_validator = cdf.TrackingSchemaValidator( - schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" + from cdf.domain import ( + CdfMetaDataSchema, + Stadium, + Competition, + Season, + Meta, + Tracking, ) # Define additional metadata - additional_metadata = { - "competition": { - "id": "COMP_123", - "name": "Test Competition", - "format": "league_20", - }, - "season": {"id": "SEASON_2024", "name": "2024/25"}, - "stadium": { - "id": "STADIUM_456", - "name": "Test Arena", - "turf": "grass", - }, - "meta": { - "tracking": { - "version": "2.0.0", - "name": "TestTracker", - "fps": 30, - "collection_timing": "live", - } - }, - } - - with tempfile.NamedTemporaryFile( - mode="w+b", suffix=".json", delete=False - ) as meta_file, tempfile.NamedTemporaryFile( - mode="w+b", suffix=".jsonl", delete=False - ) as tracking_file: - - # Instantiate the named tuple for outputs - outputs = CDFOutputs(meta_data=meta_file, tracking_data=tracking_file) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - success = serializer.serialize( - dataset, outputs, additional_metadata=additional_metadata - ) - assert success is True - - # Verify no warnings about missing mandatory IDs were raised - missing_id_warnings = [ - warning - for warning in w - if issubclass(warning.category, UserWarning) - and "Missing mandatory ID" in str(warning.message) - ] - assert len(missing_id_warnings) == 0, ( - f"Expected no missing mandatory ID warnings, but got {len(missing_id_warnings)}: " - f"{[str(warning.message) for warning in missing_id_warnings]}" + additional_meta_data = CdfMetaDataSchema( + competition=Competition( + id="61", name="A-League", type="mens", format="league" + ), + season=Season(id="95", name="2024/2025"), + stadium=Stadium( + id="2914", + name="Kayo Stadium", + ), + meta=Meta( + tracking=Tracking( + version="v3", + collection_timing="post_match", ) + ), + ) - # Save paths for validation after leaving the block - meta_path = meta_file.name - tracking_path = tracking_file.name + meta_path = tmp_path / "metadata.json" + tracking_path = tmp_path / "tracking.jsonl" - # Validate metadata - meta_validator.validate_schema(sample=meta_path) - - # Verify additional metadata was applied correctly - with open(meta_path, "r") as f: - meta_data = json.load(f) - - # Check competition metadata - assert meta_data["competition"]["id"] == "COMP_123" - assert meta_data["competition"]["name"] == "Test Competition" - assert meta_data["competition"]["format"] == "league_20" - - # Check season metadata - assert meta_data["season"]["id"] == "SEASON_2024" - assert meta_data["season"]["name"] == "2024/25" - - # Check stadium metadata - assert meta_data["stadium"]["id"] == "STADIUM_456" - assert meta_data["stadium"]["name"] == "Test Arena" - assert meta_data["stadium"]["turf"] == "grass" - # Verify default values still present - assert "pitch_length" in meta_data["stadium"] - assert "pitch_width" in meta_data["stadium"] - - # Check meta tracking information - assert meta_data["meta"]["tracking"]["version"] == "2.0.0" - assert meta_data["meta"]["tracking"]["name"] == "TestTracker" - assert meta_data["meta"]["tracking"]["fps"] == 30 - assert meta_data["meta"]["tracking"]["collection_timing"] == "live" - - # Validate tracking data - read and validate each line (frame) in the JSONL file - with open(tracking_path, "r") as f: - frame_count = 0 - for line in f: - if line.strip(): # Skip empty lines - frame_data = json.loads(line) - # Validate each frame against the tracking schema - tracking_validator.validate_schema(sample=frame_data) - frame_count += 1 - - assert frame_count > 0, "No frames were serialized" + dataset.to_cdf( + metadata_output_file=str(meta_path), + tracking_output_file=str(tracking_path), + additional_metadata=additional_meta_data, + ) - # Clean up - Path(meta_path).unlink() - Path(tracking_path).unlink() + meta_data_validator.validate_schema(sample=meta_path) + tracking_data_validator.validate_schema(sample=tracking_path, limit=None) def serializer_handles_invalid_metadata_types(dataset): """Test that CDFTrackingDataSerializer handles invalid metadata types gracefully.""" + import cdf + serializer = CDFTrackingDataSerializer() with tempfile.NamedTemporaryFile( @@ -306,35 +198,71 @@ def dataset_skillcorner(self, raw_data_v3: Path, meta_data_v3: Path): only_alive=False, ) - def test_produces_valid_cdf_output_sportec(self, dataset_sportec): - produces_valid_cdf_output(dataset=dataset_sportec) + @pytest.fixture + def meta_data_validator(self): + import cdf - def test_produces_valid_cdf_output_with_additional_metadata_sportec( - self, dataset_sportec - ): - produces_valid_cdf_output_with_additional_metadata( - dataset=dataset_sportec + # Instantiate Validators + return cdf.MetaSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/meta.json" ) - def test_serializer_handles_invalid_metadata_types_sportec( - self, dataset_sportec - ): - serializer_handles_invalid_metadata_types(dataset=dataset_sportec) + @pytest.fixture + def tracking_data_validator(self): + import cdf - def test_produces_valid_cdf_output_skillcorner(self, dataset_skillcorner): - produces_valid_cdf_output(dataset=dataset_skillcorner) + # Instantiate Validators + return cdf.TrackingSchemaValidator( + schema=f"cdf/files/v{cdf.VERSION}/schema/tracking.json" + ) + + def test_produces_valid_cdf_output( + self, + dataset_sportec, + dataset_skillcorner, + tracking_data_validator, + meta_data_validator, + tmp_path, + ): + mimimum_valid_cdf_output( + dataset_sportec, + meta_data_validator, + tracking_data_validator, + tmp_path, + ) + mimimum_valid_cdf_output( + dataset_skillcorner, + meta_data_validator, + tracking_data_validator, + tmp_path, + ) - def test_produces_valid_cdf_output_with_additional_metadata_skillcorner( - self, dataset_skillcorner + def test_produces_valid_cdf_output_with_additional_metadata( + self, + dataset_skillcorner, + dataset_sportec, + tracking_data_validator, + meta_data_validator, + tmp_path, ): produces_valid_cdf_output_with_additional_metadata( - dataset=dataset_skillcorner + dataset_skillcorner, + meta_data_validator, + tracking_data_validator, + tmp_path, + ) + produces_valid_cdf_output_with_additional_metadata( + dataset_sportec, + meta_data_validator, + tracking_data_validator, + tmp_path, ) - def test_serializer_handles_invalid_metadata_types_skillcorner( - self, dataset_skillcorner + def test_serializer_handles_invalid_metadata_types( + self, dataset_skillcorner, dataset_sportec ): serializer_handles_invalid_metadata_types(dataset=dataset_skillcorner) + serializer_handles_invalid_metadata_types(dataset=dataset_sportec) def test_cdf_positions(self): """ From 5ffa0d03ec621a55b496624382f515274b938079 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 16:36:11 +0100 Subject: [PATCH 14/16] remove error, add warning --- .../infra/serializers/tracking/cdf/serializer.py | 14 ++++++++------ kloppy/tests/test_cdf.py | 3 --- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index ac5d54056..c1f5f6cf2 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -37,7 +37,7 @@ import warnings -MISSING_MANDATORY_ID = "MISSING_MANDATORY_ID" +MISSING_MANDATORY_VALUE = "MISSING_MANDATORY_VALUE" class CDFOutputs(NamedTuple): @@ -195,13 +195,13 @@ def _build_default_metadata_structure( return { "competition": { - "id": MISSING_MANDATORY_ID, + "id": MISSING_MANDATORY_VALUE, }, "season": { - "id": MISSING_MANDATORY_ID, + "id": MISSING_MANDATORY_VALUE, }, "stadium": { - "id": MISSING_MANDATORY_ID, + "id": MISSING_MANDATORY_VALUE, "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, }, @@ -239,6 +239,8 @@ def _build_default_metadata_structure( "fps": dataset.metadata.frame_rate, "name": dataset.metadata.provider.name.lower(), "converted_by": f"kloppy-cdf-converter-{__version__}", + "version": MISSING_MANDATORY_VALUE, + "collection_timing": MISSING_MANDATORY_VALUE, }, "landmarks": None, "ball": None, @@ -286,9 +288,9 @@ def _internal_validation_metadata( for key, value in metadata.items(): current_path = f"{path}.{key}" if path else key - if value == MISSING_MANDATORY_ID: + if value == MISSING_MANDATORY_VALUE: warnings.warn( - f"Missing mandatory ID at '{current_path}'. Currently replaced with the value '{MISSING_MANDATORY_ID}'. " + f"Missing mandatory ID at '{current_path}'. Currently replaced with the value '{MISSING_MANDATORY_VALUE}'. " f"Please provide the correct value to 'additional_metadata' to completely adhere to the CDF specification.", UserWarning, ) diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index bb06d6f8c..1731443fa 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -30,9 +30,6 @@ def mimimum_valid_cdf_output( additional_metadata={}, ) - with pytest.raises(Exception, match="'version' is a required property"): - meta_data_validator.validate_schema(sample=meta_path) - dataset.to_cdf( metadata_output_file=str(meta_path), tracking_output_file=str(tracking_path), From 766e2f027c2863e1455d0f399846f21fc5f43b85 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 12 Dec 2025 16:52:05 +0100 Subject: [PATCH 15/16] fix test --- kloppy/tests/test_cdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 1731443fa..ab1ad884b 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -130,7 +130,8 @@ def serializer_handles_invalid_metadata_types(dataset): "meta": { "tracking": { "fps": "25", # Should be int - "version": 1.0, # Should be string + "version": 1.0, # Should be string, + "collection_timing": "Nothing", } }, } From 75da5d6967a954a02c4862c9b539cd8785077785 Mon Sep 17 00:00:00 2001 From: UnravelSports <64530306+UnravelSports@users.noreply.github.com> Date: Mon, 15 Dec 2025 08:42:48 +0000 Subject: [PATCH 16/16] failing tests --- kloppy/infra/io/adapters/fsspec.py | 2 +- .../infra/serializers/tracking/skillcorner.py | 45 +++++-------------- kloppy/io.py | 40 +++++++---------- 3 files changed, 28 insertions(+), 59 deletions(-) diff --git a/kloppy/infra/io/adapters/fsspec.py b/kloppy/infra/io/adapters/fsspec.py index 0c866a93a..3a11db863 100644 --- a/kloppy/infra/io/adapters/fsspec.py +++ b/kloppy/infra/io/adapters/fsspec.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod import re -from typing import BinaryIO, Optional +from typing import BinaryIO, Optional, List import fsspec diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index 72a2538b5..f58cb1482 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta, timezone import json import logging -from typing import IO, NamedTuple, Optional, Union +from typing import IO, NamedTuple, Optional, Union, Dict import warnings from kloppy.domain import ( @@ -101,9 +101,7 @@ def _get_frame_data_v2( only_alive, ): ball_owning_team = cls._get_ball_owning_team(frame["possession"], teams) - ball_state = ( - BallState.ALIVE if ball_owning_team is not None else BallState.DEAD - ) + ball_state = BallState.ALIVE if ball_owning_team is not None else BallState.DEAD if ball_state == BallState.DEAD and only_alive: return None @@ -131,17 +129,12 @@ def _get_frame_data_v2( ball_coordinates = Point3D(x=float(x), y=float(y), z=z) continue - elif ( - trackable_object in referee_dict.keys() - or group_name == "referee" - ): + elif trackable_object in referee_dict.keys() or group_name == "referee": group_name = "referee" continue # Skip Referee Coords if group_name is None: - group_name = teamdict.get( - player_id_to_team_dict.get(trackable_object) - ) + group_name = teamdict.get(player_id_to_team_dict.get(trackable_object)) if group_name == "home_team": player = players["HOME"][trackable_object] @@ -173,9 +166,7 @@ def _get_frame_data_v2( players_data=players_data, period=periods[frame["period"]], ball_state=( - BallState.ALIVE - if ball_owning_team is not None - else BallState.DEAD + BallState.ALIVE if ball_owning_team is not None else BallState.DEAD ), ball_owning_team=ball_owning_team, other_data={}, @@ -199,9 +190,7 @@ def _get_frame_data_v3( only_alive, ): ball_owning_team = cls._get_ball_owning_team(frame["possession"], teams) - ball_state = ( - BallState.ALIVE if ball_owning_team is not None else BallState.DEAD - ) + ball_state = BallState.ALIVE if ball_owning_team is not None else BallState.DEAD if ball_state == BallState.DEAD and only_alive: return None @@ -222,9 +211,7 @@ def _get_frame_data_v3( player = all_players_mapping[str(raw_player_data["player_id"])] player_coordinates = cls._raw_coordinates_to_point(raw_player_data) if player_coordinates: - players_data[player] = PlayerData( - coordinates=player_coordinates - ) + players_data[player] = PlayerData(coordinates=player_coordinates) frame = create_frame( frame_id=frame_id, @@ -355,12 +342,8 @@ def __get_periods(cls, tracking): if _frames: periods[period] = Period( id=period, - start_timestamp=timedelta( - seconds=_frames[0]["frame"] / frame_rate - ), - end_timestamp=timedelta( - seconds=_frames[-1]["frame"] / frame_rate - ), + start_timestamp=timedelta(seconds=_frames[0]["frame"] / frame_rate), + end_timestamp=timedelta(seconds=_frames[-1]["frame"] / frame_rate), ) return periods @@ -419,13 +402,11 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: } player_dict = { - player["trackable_object"]: player - for player in metadata["players"] + player["trackable_object"]: player for player in metadata["players"] } referee_dict = { - ref["trackable_object"]: "referee" - for ref in metadata["referees"] + ref["trackable_object"]: "referee" for ref in metadata["referees"] } ball_id = metadata["ball"]["trackable_object"] @@ -562,9 +543,7 @@ def _iter(): frames.append(frame) n_frames += 1 - if self.limit and n_frames + 1 >= ( - self.limit / self.sample_rate - ): + if self.limit and n_frames + 1 >= (self.limit / self.sample_rate): break attacking_directions = attacking_directions_from_multi_frames( diff --git a/kloppy/io.py b/kloppy/io.py index 8cff4fd24..15f210afa 100644 --- a/kloppy/io.py +++ b/kloppy/io.py @@ -1,12 +1,8 @@ """I/O utilities for reading raw data.""" import bz2 -from collections.abc import Generator, Iterable, Iterator import contextlib -from contextlib import AbstractContextManager -from dataclasses import dataclass, replace import gzip -from io import BufferedWriter, BytesIO, TextIOWrapper import logging import lzma import os @@ -20,7 +16,13 @@ Any, BinaryIO, Callable, + ContextManager, + Generator, + Iterable, + Iterator, + List, Optional, + Tuple, Union, ) @@ -69,7 +71,7 @@ def create(cls, input_: Optional[FileOrPath], **kwargs): def _file_or_path_to_binary_stream( file_or_path: FileOrPath, binary_mode: str -) -> tuple[BinaryIO, bool]: +) -> Tuple[BinaryIO, bool]: """ Converts a file path or a file-like object to a binary stream. @@ -83,9 +85,7 @@ def _file_or_path_to_binary_stream( """ assert binary_mode in ("rb", "wb", "ab") - if isinstance(file_or_path, (str, bytes)) or hasattr( - file_or_path, "__fspath__" - ): + if isinstance(file_or_path, (str, bytes)) or hasattr(file_or_path, "__fspath__"): # If file_or_path is a path-like object, open it and return the binary stream return open(os.fspath(file_or_path), binary_mode), True # type: ignore @@ -178,7 +178,7 @@ def _open( filename: FileOrPath, mode: str = "rb", compresslevel: Optional[int] = None, - format: Optional[str] = None, # noqa: A002 + format: Optional[str] = None, ) -> BinaryIO: """ A replacement for the "open" function that can also read and write @@ -273,9 +273,7 @@ def _open_gz( if "r" in mode: return gzip.open(filename, mode) # type: ignore - return BufferedWriter( - gzip.open(filename, mode, compresslevel=compresslevel) - ) # type: ignore + return BufferedWriter(gzip.open(filename, mode, compresslevel=compresslevel)) # type: ignore def get_file_extension(file_or_path: FileLike) -> str: @@ -302,9 +300,7 @@ def get_file_extension(file_or_path: FileLike) -> str: >>> get_file_extension(Source(data="example.csv")) '.csv' """ - if isinstance(file_or_path, (str, bytes)) or hasattr( - file_or_path, "__fspath__" - ): + if isinstance(file_or_path, (str, bytes)) or hasattr(file_or_path, "__fspath__"): path = os.fspath(file_or_path) # type: ignore for ext in [".gz", ".xz", ".bz2"]: if path.endswith(ext): @@ -325,9 +321,7 @@ def dummy_context_mgr() -> Generator[None, None, None]: @contextlib.contextmanager -def _write_context_manager( - uri: str, mode: str -) -> Generator[BinaryIO, None, None]: +def _write_context_manager(uri: str, mode: str) -> Generator[BinaryIO, None, None]: """ Context manager for write operations that buffers writes and flushes to adapter on exit. @@ -405,9 +399,7 @@ def open_as_file( """ # Validate mode if mode not in ("rb", "wb", "ab"): - raise ValueError( - f"Mode '{mode}' not supported. Use 'rb', 'wb', or 'ab'." - ) + raise ValueError(f"Mode '{mode}' not supported. Use 'rb', 'wb', or 'ab'.") # Handle Source wrapper if isinstance(input_, Source): @@ -430,9 +422,7 @@ def open_as_file( if isinstance(input_, str) and ("{" in input_ or "<" in input_): raise TypeError("Cannot write to inline JSON/XML string.") if isinstance(input_, bytes): - raise TypeError( - "Cannot write to bytes object. Use BytesIO instead." - ) + raise TypeError("Cannot write to bytes object. Use BytesIO instead.") # Read modes: Handle inline data if mode == "rb": @@ -481,7 +471,7 @@ def open_as_file( raise TypeError(f"Unsupported input type: {type(input_)}") -def _natural_sort_key(path: str) -> list[Union[int, str]]: +def _natural_sort_key(path: str) -> List[Union[int, str]]: # Split string into list of chunks for natural sorting return [ int(text) if text.isdigit() else text.lower()