From ca21401c428231745c9645b9f0630c3040623a00 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 19 May 2023 14:16:38 -0700 Subject: [PATCH 01/14] wip --- .../geotag/construct_mp4_parser.py | 18 ++++--- mapillary_tools/geotag/simple_mp4_builder.py | 54 ++++++++++--------- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/mapillary_tools/geotag/construct_mp4_parser.py b/mapillary_tools/geotag/construct_mp4_parser.py index 8736f5e22..201a692f5 100644 --- a/mapillary_tools/geotag/construct_mp4_parser.py +++ b/mapillary_tools/geotag/construct_mp4_parser.py @@ -447,12 +447,6 @@ def parse_box(self, data: bytes) -> BoxDict: def parse_boxlist(self, data: bytes) -> T.List[BoxDict]: return T.cast(T.List[BoxDict], self.BoxList.parse(data)) - def build_box(self, box: BoxDict) -> bytes: - return self.Box.build(box) - - def build_boxlist(self, boxes: T.Sequence[BoxDict]) -> bytes: - return self.BoxList.build(boxes) - class Box32ConstructBuilder(Box64ConstructBuilder): """ @@ -473,6 +467,18 @@ def Box(self) -> C.Construct: return self._box + def parse_box(self, data: bytes) -> BoxDict: + raise NotImplementedError("Box32ConstructBuilder does not support parsing") + + def parse_boxlist(self, data: bytes) -> T.List[BoxDict]: + raise NotImplementedError("Box32ConstructBuilder does not support parsing") + + def build_box(self, box: BoxDict) -> bytes: + return self.Box.build(box) + + def build_boxlist(self, boxes: T.Sequence[BoxDict]) -> bytes: + return self.BoxList.build(boxes) + # pyre-ignore[9]: pyre does not support recursive type SwitchMapType CMAP: SwitchMapType = { diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index a718c6a92..424149bc4 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -324,62 +324,68 @@ def transform_mp4( ) -> io_utils.ChainedIO: # extract ftyp src_fp.seek(0) - source_ftyp_box_data = parser.parse_mp4_data_firstx(src_fp, [b"ftyp"]) - source_ftyp_data = cparser.MP4WithoutSTBLBuilderConstruct.build_box( - {"type": b"ftyp", "data": source_ftyp_box_data} - ) + ftyp_data = parser.parse_mp4_data_firstx(src_fp, [b"ftyp"]) # extract moov src_fp.seek(0) - src_moov_data = parser.parse_mp4_data_firstx(src_fp, [b"moov"]) - moov_children = _MOOVChildrenParserConstruct.parse_boxlist(src_moov_data) + moov_data = parser.parse_mp4_data_firstx(src_fp, [b"moov"]) + moov_children = _MOOVChildrenParserConstruct.parse_boxlist(moov_data) # filter tracks in moov moov_children = list(_filter_moov_children_boxes(moov_children)) # extract video samples source_samples = list(iterate_samples(moov_children)) - movie_sample_readers = [ + sample_readers: T.List[io.IOBase] = [ io_utils.SlicedIO(src_fp, sample.offset, sample.size) for sample in source_samples ] if sample_generator is not None: - sample_readers = list(sample_generator(src_fp, moov_children)) - else: - sample_readers = [] + sample_readers.extend(sample_generator(src_fp, moov_children)) _update_all_trak_tkhd(moov_children) - # moov_boxes should be immutable since here + return build_mp4(ftyp_data, moov_children, sample_readers) + + +def build_mp4( + ftyp_data: bytes, + moov_children: T.Sequence[BoxDict], + sample_readers: T.Iterable[io.IOBase], +) -> io_utils.ChainedIO: + ftyp_box = cparser.MP4WithoutSTBLBuilderConstruct.build_box( + {"type": b"ftyp", "data": ftyp_data} + ) mdat_body_size = sum(sample.size for sample in iterate_samples(moov_children)) + # moov_children should be immutable since here + new_moov_box = _rewrite_moov(len(ftyp_box), moov_children) return io_utils.ChainedIO( [ - io.BytesIO(source_ftyp_data), - io.BytesIO(_rewrite_moov(len(source_ftyp_data), moov_children)), + io.BytesIO(ftyp_box), + io.BytesIO(new_moov_box), io.BytesIO(_build_mdat_header_bytes(mdat_body_size)), - *movie_sample_readers, *sample_readers, ] ) -def _rewrite_moov(moov_offset: int, moov_boxes: T.Sequence[BoxDict]) -> bytes: +def _rewrite_moov(moov_offset: int, moov_children: T.Sequence[BoxDict]) -> bytes: # build moov for calculating moov size sample_offset = 0 - for box in _filter_trak_boxes(moov_boxes): + for box in _filter_trak_boxes(moov_children): sample_offset = _update_sbtl(box, sample_offset) - moov_data = _build_moov_bytes(moov_boxes) - moov_data_size = len(moov_data) + moov_bytes = _build_moov_bytes(moov_children) + moov_bytes_size = len(moov_bytes) # mdat header size - mdat_body_size = sum(sample.size for sample in iterate_samples(moov_boxes)) + mdat_body_size = sum(sample.size for sample in iterate_samples(moov_children)) mdat_header = _build_mdat_header_bytes(mdat_body_size) # build moov for real - sample_offset = moov_offset + len(moov_data) + len(mdat_header) - for box in _filter_trak_boxes(moov_boxes): + sample_offset = moov_offset + len(moov_bytes) + len(mdat_header) + for box in _filter_trak_boxes(moov_children): sample_offset = _update_sbtl(box, sample_offset) - moov_data = _build_moov_bytes(moov_boxes) - assert len(moov_data) == moov_data_size, f"{len(moov_data)} != {moov_data_size}" + moov_bytes = _build_moov_bytes(moov_children) + assert len(moov_bytes) == moov_bytes_size, f"{len(moov_bytes)} != {moov_bytes_size}" - return moov_data + return moov_bytes From be2331e6e441ec384aa9e6af4d97977228b5c590 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 3 Aug 2023 11:23:29 -0700 Subject: [PATCH 02/14] more naming refactoring --- mapillary_tools/geotag/simple_mp4_builder.py | 59 +++++++++++++------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index 424149bc4..75c532759 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -11,6 +11,17 @@ from .construct_mp4_parser import BoxDict from .mp4_sample_parser import RawSample +""" +Variable naming conventions: + +- *_box: a BoxDict +- *_boxes: a list of BoxDicts +- *_children: a list of BoxDicts under the parent box +- *_data: the data in bytes of a box (without the header (type and size)) +- *_typed_data: the data in bytes of a box (with the header (type and size)) +""" + + UINT32_MAX = 2**32 - 1 UINT64_MAX = 2**64 - 1 @@ -225,7 +236,7 @@ def _update_all_trak_tkhd(moov_chilren: T.Sequence[BoxDict]) -> None: ) -def _update_sbtl(trak: BoxDict, sample_offset: int) -> int: +def _update_sbtl_sample_offsets(trak: BoxDict, sample_offset: int) -> int: assert trak["type"] == b"trak" # new samples with offsets updated @@ -249,8 +260,7 @@ def _update_sbtl(trak: BoxDict, sample_offset: int) -> int: stbl_children_boxes = build_stbl_from_raw_samples( descriptions, repositioned_samples ) - new_stbl_bytes = _STBLChildrenBuilderConstruct.build_boxlist(stbl_children_boxes) - stbl_box["data"] = new_stbl_bytes + stbl_box["data"] = _STBLChildrenBuilderConstruct.build_boxlist(stbl_children_boxes) return sample_offset @@ -269,7 +279,7 @@ def iterate_samples( yield from raw_samples_iter -def _build_mdat_header_bytes(mdat_size: int) -> bytes: +def _build_mdat_header_data(mdat_size: int) -> bytes: if UINT32_MAX < mdat_size + 8: return cparser.BoxHeader64.build( { @@ -302,7 +312,7 @@ def find_movie_timescale(moov_children: T.Sequence[BoxDict]) -> int: return T.cast(T.Dict, mvhd["data"])["timescale"] -def _build_moov_bytes(moov_children: T.Sequence[BoxDict]) -> bytes: +def _build_moov_typed_data(moov_children: T.Sequence[BoxDict]) -> bytes: return cparser.MP4WithoutSTBLBuilderConstruct.build_box( { "type": b"moov", @@ -353,39 +363,48 @@ def build_mp4( moov_children: T.Sequence[BoxDict], sample_readers: T.Iterable[io.IOBase], ) -> io_utils.ChainedIO: - ftyp_box = cparser.MP4WithoutSTBLBuilderConstruct.build_box( + ftyp_typed_data = cparser.MP4WithoutSTBLBuilderConstruct.build_box( {"type": b"ftyp", "data": ftyp_data} ) mdat_body_size = sum(sample.size for sample in iterate_samples(moov_children)) # moov_children should be immutable since here - new_moov_box = _rewrite_moov(len(ftyp_box), moov_children) + new_moov_typed_data = _rewrite_and_build_moov_typed_data( + len(ftyp_typed_data), moov_children + ) return io_utils.ChainedIO( [ - io.BytesIO(ftyp_box), - io.BytesIO(new_moov_box), - io.BytesIO(_build_mdat_header_bytes(mdat_body_size)), + # ftyp + io.BytesIO(ftyp_typed_data), + # moov + io.BytesIO(new_moov_typed_data), + # mdat + io.BytesIO(_build_mdat_header_data(mdat_body_size)), *sample_readers, ] ) -def _rewrite_moov(moov_offset: int, moov_children: T.Sequence[BoxDict]) -> bytes: +def _rewrite_and_build_moov_typed_data( + moov_offset: int, moov_children: T.Sequence[BoxDict] +) -> bytes: # build moov for calculating moov size sample_offset = 0 for box in _filter_trak_boxes(moov_children): - sample_offset = _update_sbtl(box, sample_offset) - moov_bytes = _build_moov_bytes(moov_children) - moov_bytes_size = len(moov_bytes) + sample_offset = _update_sbtl_sample_offsets(box, sample_offset) + moov_typed_data = _build_moov_typed_data(moov_children) + moov_typed_data_size = len(moov_typed_data) # mdat header size mdat_body_size = sum(sample.size for sample in iterate_samples(moov_children)) - mdat_header = _build_mdat_header_bytes(mdat_body_size) + mdat_header_data = _build_mdat_header_data(mdat_body_size) # build moov for real - sample_offset = moov_offset + len(moov_bytes) + len(mdat_header) + sample_offset = moov_offset + len(moov_typed_data) + len(mdat_header_data) for box in _filter_trak_boxes(moov_children): - sample_offset = _update_sbtl(box, sample_offset) - moov_bytes = _build_moov_bytes(moov_children) - assert len(moov_bytes) == moov_bytes_size, f"{len(moov_bytes)} != {moov_bytes_size}" + sample_offset = _update_sbtl_sample_offsets(box, sample_offset) + moov_typed_data = _build_moov_typed_data(moov_children) + assert ( + len(moov_typed_data) == moov_typed_data_size + ), f"{len(moov_typed_data)} != {moov_typed_data_size}" - return moov_bytes + return moov_typed_data From 50b533949e64aaaedc93aa1b9b51275bb3f6db00 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 4 Aug 2023 16:20:50 -0700 Subject: [PATCH 03/14] simplify Sample and RawSample --- mapillary_tools/geotag/camm_builder.py | 2 +- mapillary_tools/geotag/camm_parser.py | 8 +- mapillary_tools/geotag/gpmf_parser.py | 23 ++-- mapillary_tools/geotag/mp4_sample_parser.py | 132 +++++++------------ mapillary_tools/geotag/simple_mp4_builder.py | 11 +- mapillary_tools/sample_video.py | 10 +- tests/unit/test_mp4_sample_parser.py | 6 +- tests/unit/test_simple_mp4_builder.py | 30 ++--- 8 files changed, 96 insertions(+), 126 deletions(-) diff --git a/mapillary_tools/geotag/camm_builder.py b/mapillary_tools/geotag/camm_builder.py index 5ff61e35b..53740d2c2 100644 --- a/mapillary_tools/geotag/camm_builder.py +++ b/mapillary_tools/geotag/camm_builder.py @@ -104,7 +104,7 @@ def convert_points_to_raw_samples( offset=0, size=len(camm_sample_data), timedelta=timedelta, - composition_offset=0, + composition_timedelta=0, is_sync=True, ) diff --git a/mapillary_tools/geotag/camm_parser.py b/mapillary_tools/geotag/camm_parser.py index 994769d4d..d653452f0 100644 --- a/mapillary_tools/geotag/camm_parser.py +++ b/mapillary_tools/geotag/camm_parser.py @@ -82,12 +82,12 @@ class CAMMType(Enum): def _parse_point_from_sample( fp: T.BinaryIO, sample: sample_parser.Sample ) -> T.Optional[geo.Point]: - fp.seek(sample.offset, io.SEEK_SET) - data = fp.read(sample.size) + fp.seek(sample.raw_sample.offset, io.SEEK_SET) + data = fp.read(sample.raw_sample.size) box = CAMMSampleData.parse(data) if box.type == CAMMType.MIN_GPS.value: return geo.Point( - time=sample.time_offset, + time=sample.exact_time, lat=box.data[0], lon=box.data[1], alt=box.data[2], @@ -97,7 +97,7 @@ def _parse_point_from_sample( # Not using box.data.time_gps_epoch as the point timestamp # because it is from another clock return geo.Point( - time=sample.time_offset, + time=sample.exact_time, lat=box.data.latitude, lon=box.data.longitude, alt=box.data.altitude, diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index c01cf0ba3..f8a75056e 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -257,8 +257,8 @@ def _extract_dvnm_from_samples( dvnm_by_dvid: T.Dict[int, bytes] = {} for sample in samples: - fp.seek(sample.offset, io.SEEK_SET) - data = fp.read(sample.size) + fp.seek(sample.raw_sample.offset, io.SEEK_SET) + data = fp.read(sample.raw_sample.size) gpmf_sample_data = T.cast(T.Dict, GPMFSampleData.parse(data)) # iterate devices @@ -281,8 +281,8 @@ def _extract_points_from_samples( points_by_dvid: T.Dict[int, T.List[geo.PointWithFix]] = {} for sample in samples: - fp.seek(sample.offset, io.SEEK_SET) - data = fp.read(sample.size) + fp.seek(sample.raw_sample.offset, io.SEEK_SET) + data = fp.read(sample.raw_sample.size) gpmf_sample_data = T.cast(T.Dict, GPMFSampleData.parse(data)) # iterate devices @@ -291,9 +291,9 @@ def _extract_points_from_samples( sample_points = _find_first_gps_stream(device["data"]) if sample_points: # interpolate timestamps in between - avg_timedelta = sample.timedelta / len(sample_points) + avg_timedelta = sample.exact_timedelta / len(sample_points) for idx, point in enumerate(sample_points): - point.time = sample.time_offset + avg_timedelta * idx + point.time = sample.exact_time + avg_timedelta * idx device_id = _find_first_device_id(device["data"]) device_points = points_by_dvid.setdefault(device_id, []) @@ -340,10 +340,9 @@ def _extract_gpmd_samples_from_trak( if gpmd_descriptions: s.seek(trak_start_offset, io.SEEK_SET) samples = sample_parser.parse_samples_from_trak(s, maxsize=maxsize) - gpmd_samples = ( - sample for sample in samples if sample.description["format"] == b"gpmd" - ) - yield from gpmd_samples + for sample in samples: + if sample.description["format"] == b"gpmd": + yield sample def extract_all_device_names(fp: T.BinaryIO) -> T.Dict[int, bytes]: @@ -398,6 +397,6 @@ def iterate_gpmd_sample_data(fp: T.BinaryIO) -> T.Generator[T.Dict, None, None]: for h, s in parser.parse_path(fp, [b"moov", b"trak"]): gpmd_samples = _extract_gpmd_samples_from_trak(s, h.maxsize) for sample in gpmd_samples: - fp.seek(sample.offset, io.SEEK_SET) - data = fp.read(sample.size) + fp.seek(sample.raw_sample.offset, io.SEEK_SET) + data = fp.read(sample.raw_sample.size) yield T.cast(T.Dict, GPMFSampleData.parse(data)) diff --git a/mapillary_tools/geotag/mp4_sample_parser.py b/mapillary_tools/geotag/mp4_sample_parser.py index 4c90e0a30..fa8d52422 100644 --- a/mapillary_tools/geotag/mp4_sample_parser.py +++ b/mapillary_tools/geotag/mp4_sample_parser.py @@ -9,47 +9,39 @@ class RawSample(T.NamedTuple): # 1-based index description_idx: int - # sample offset + + # sample offset (offset from the beginning of the file) offset: int - # sample size + + # sample size (in bytes) size: int - # sample_delta read from stts entries, + + # sample_delta read from stts entries that decides when to decode the sample, # i.e. STTS(n) in the forumula DT(n+1) = DT(n) + STTS(n) + # NOTE: timescale is not applied yet (hence int) timedelta: int - # sample composition offset, + + # sample composition offset that decides when to present the sample, # i.e. CTTS(n) in the forumula CT(n) = DT(n) + CTTS(n). - composition_offset: int + # NOTE: timescale is not applied yet (hence int) + composition_timedelta: int + # if it is a sync sample is_sync: bool -# TODO: can not inherit RawSample? class Sample(T.NamedTuple): - # copied from RawSample + raw_sample: RawSample - # 1-based index - description_idx: int - # sample offset - offset: int - # sample size - size: int - # sample delta in seconds read from stts entries, - # i.e. (STTS(n) / timescale) in the forumula DT(n+1) = DT(n) + STTS(n) - timedelta: float - # sample composition offset in seconds, - # i.e. (CTTS(n) / timescale) in the forumula CT(n) = DT(n) + CTTS(n). - composition_offset: float - # if it is a sync sample - is_sync: bool + # accumulated timedelta in seconds, i.e. DT(n) / timescale + exact_time: float - # extended fields below + # accumulated composition timedelta in seconds, i.e. CT(n) / timescale + exact_composition_time: float + + # exact timedelta in seconds, i.e. STTS(n) / timescale + exact_timedelta: float - # accumulated sample_delta in seconds, - # i.e. (DT(n) / timescale) in the forumula DT(n+1) = DT(n) + STTS(n) - time_offset: T.Union[int, float] - # accumulated composition offset in seconds, - # i.e. (CT(n) / timescale) in the forumula CT(n) = DT(n) + CTTS(n). - composition_time_offset: T.Union[int, float] # reference to the sample description description: T.Dict @@ -59,7 +51,7 @@ def _extract_raw_samples( chunk_entries: T.Sequence[T.Dict], chunk_offsets: T.Sequence[int], timedeltas: T.Sequence[int], - composition_offsets: T.Optional[T.Sequence[int]], + composition_timedeltas: T.Optional[T.Sequence[int]], syncs: T.Optional[T.Set[int]], ) -> T.Generator[RawSample, None, None]: if not sizes: @@ -90,9 +82,9 @@ def _extract_raw_samples( # iterate samples in this chunk for _ in range(entry["samples_per_chunk"]): is_sync = syncs is None or (sample_idx + 1) in syncs - composition_offset = ( - composition_offsets[sample_idx] - if composition_offsets is not None + composition_timedelta = ( + composition_timedeltas[sample_idx] + if composition_timedeltas is not None else 0 ) yield RawSample( @@ -100,7 +92,7 @@ def _extract_raw_samples( offset=sample_offset, size=sizes[sample_idx], timedelta=timedeltas[sample_idx], - composition_offset=composition_offset, + composition_timedelta=composition_timedelta, is_sync=is_sync, ) sample_offset += sizes[sample_idx] @@ -117,9 +109,9 @@ def _extract_raw_samples( # iterate samples in this chunk for _ in range(chunk_entries[-1]["samples_per_chunk"]): is_sync = syncs is None or (sample_idx + 1) in syncs - composition_offset = ( - composition_offsets[sample_idx] - if composition_offsets is not None + composition_timedelta = ( + composition_timedeltas[sample_idx] + if composition_timedeltas is not None else 0 ) yield RawSample( @@ -127,7 +119,7 @@ def _extract_raw_samples( offset=sample_offset, size=sizes[sample_idx], timedelta=timedeltas[sample_idx], - composition_offset=composition_offset, + composition_timedelta=composition_timedelta, is_sync=is_sync, ) sample_offset += sizes[sample_idx] @@ -138,38 +130,22 @@ def _extract_raw_samples( def _extract_samples( raw_samples: T.Iterator[RawSample], descriptions: T.List, + timescale: int, ) -> T.Generator[Sample, None, None]: acc_delta = 0 for raw_sample in raw_samples: yield Sample( - description_idx=raw_sample.description_idx, - offset=raw_sample.offset, - size=raw_sample.size, - timedelta=raw_sample.timedelta, - composition_offset=raw_sample.composition_offset, - is_sync=raw_sample.is_sync, + raw_sample=raw_sample, description=descriptions[raw_sample.description_idx - 1], - time_offset=acc_delta, + exact_time=acc_delta / timescale, + exact_timedelta=raw_sample.timedelta / timescale, # CT(n) = DT(n) + CTTS(n) - composition_time_offset=(acc_delta + raw_sample.composition_offset), + exact_composition_time=(acc_delta + raw_sample.composition_timedelta) + / timescale, ) acc_delta += raw_sample.timedelta -def _apply_timescale(sample: Sample, media_timescale: int) -> Sample: - return Sample( - description_idx=sample.description_idx, - offset=sample.offset, - size=sample.size, - timedelta=sample.timedelta / media_timescale, - composition_offset=sample.composition_offset / media_timescale, - is_sync=sample.is_sync, - description=sample.description, - time_offset=sample.time_offset / media_timescale, - composition_time_offset=sample.composition_time_offset / media_timescale, - ) - - def parse_raw_samples_from_stbl( stbl: T.BinaryIO, maxsize: int = -1, @@ -183,7 +159,7 @@ def parse_raw_samples_from_stbl( chunk_offsets = [] chunk_entries = [] timedeltas: T.List[int] = [] - composition_offsets: T.Optional[T.List[int]] = None + composition_timedeltas: T.Optional[T.List[int]] = None syncs: T.Optional[T.Set[int]] = None for h, s in parser.parse_boxes(stbl, maxsize=maxsize, extend_eof=False): @@ -212,11 +188,11 @@ def parse_raw_samples_from_stbl( for _ in range(entry.sample_count): timedeltas.append(entry.sample_delta) elif h.type == b"ctts": - composition_offsets = [] + composition_timedeltas = [] box = cparser.CompositionTimeToSampleBox.parse(s.read(h.maxsize)) for entry in box.entries: for _ in range(entry.sample_count): - composition_offsets.append(entry.sample_offset) + composition_timedeltas.append(entry.sample_offset) elif h.type == b"stss": box = cparser.SyncSampleBox.parse(s.read(h.maxsize)) syncs = set(box.entries) @@ -225,12 +201,12 @@ def parse_raw_samples_from_stbl( # in this case append 0's to timedeltas while len(timedeltas) < len(sizes): timedeltas.append(0) - if composition_offsets is not None: - while len(composition_offsets) < len(sizes): - composition_offsets.append(0) + if composition_timedeltas is not None: + while len(composition_timedeltas) < len(sizes): + composition_timedeltas.append(0) raw_samples = _extract_raw_samples( - sizes, chunk_entries, chunk_offsets, timedeltas, composition_offsets, syncs + sizes, chunk_entries, chunk_offsets, timedeltas, composition_timedeltas, syncs ) return descriptions, raw_samples @@ -248,7 +224,7 @@ def parse_raw_samples_from_stbl_bytes( chunk_offsets = [] chunk_entries = [] timedeltas: T.List[int] = [] - composition_offsets: T.Optional[T.List[int]] = None + composition_timedeltas: T.Optional[T.List[int]] = None syncs: T.Optional[T.Set[int]] = None stbl_boxes = T.cast(T.Sequence[cparser.BoxDict], STBLBoxlistConstruct.parse(stbl)) @@ -275,10 +251,10 @@ def parse_raw_samples_from_stbl_bytes( for _ in range(entry["sample_count"]): timedeltas.append(entry["sample_delta"]) elif box["type"] == b"ctts": - composition_offsets = [] + composition_timedeltas = [] for entry in data["entries"]: for _ in range(entry["sample_count"]): - composition_offsets.append(entry["sample_offset"]) + composition_timedeltas.append(entry["sample_offset"]) elif box["type"] == b"stss": syncs = set(data["entries"]) @@ -286,12 +262,12 @@ def parse_raw_samples_from_stbl_bytes( # in this case append 0's to timedeltas while len(timedeltas) < len(sizes): timedeltas.append(0) - if composition_offsets is not None: - while len(composition_offsets) < len(sizes): - composition_offsets.append(0) + if composition_timedeltas is not None: + while len(composition_timedeltas) < len(sizes): + composition_timedeltas.append(0) raw_samples = _extract_raw_samples( - sizes, chunk_entries, chunk_offsets, timedeltas, composition_offsets, syncs + sizes, chunk_entries, chunk_offsets, timedeltas, composition_timedeltas, syncs ) return descriptions, raw_samples @@ -322,10 +298,7 @@ def parse_samples_from_trak( ) descriptions, raw_samples = parse_raw_samples_from_stbl(s, maxsize=h.maxsize) - yield from ( - _apply_timescale(s, mdhd["timescale"]) - for s in _extract_samples(raw_samples, descriptions) - ) + yield from _extract_samples(raw_samples, descriptions, mdhd["timescale"]) STSDBoxListConstruct = cparser.Box64ConstructBuilder( @@ -369,10 +342,7 @@ def parse_samples(self) -> T.Generator[Sample, None, None]: T.Dict, cparser.find_box_at_pathx(self.trak_boxes, [b"mdia", b"mdhd"])["data"], ) - yield from ( - _apply_timescale(s, mdhd["timescale"]) - for s in _extract_samples(raw_samples, descriptions) - ) + yield from _extract_samples(raw_samples, descriptions, mdhd["timescale"]) class MovieBoxParser: diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index 75c532759..5a5ece821 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -139,14 +139,15 @@ def _build_stts(sample_deltas: T.Iterable[int]) -> BoxDict: class _CompressedSampleCompositionOffset: __slots__ = ("sample_count", "sample_offset") # make sure dataclasses.asdict() produce the result as CompositionTimeToSampleBox expects + # SO DO NOT RENAME THE PROPERTIES BELOW sample_count: int sample_offset: int -def _build_ctts(sample_composition_offsets: T.Iterable[int]) -> BoxDict: +def _build_ctts(sample_composition_timedeltas: T.Iterable[int]) -> BoxDict: # compress offsets compressed: T.List[_CompressedSampleCompositionOffset] = [] - for offset in sample_composition_offsets: + for offset in sample_composition_timedeltas: if compressed and offset == compressed[-1].sample_offset: compressed[-1].sample_count += 1 else: @@ -196,8 +197,8 @@ def build_stbl_from_raw_samples( # so we can calculate the moov box size in advance _build_co64(raw_samples), ] - if any(s.composition_offset for s in raw_samples): - boxes.append(_build_ctts((s.composition_offset for s in raw_samples))) + if any(s.composition_timedelta for s in raw_samples): + boxes.append(_build_ctts((s.composition_timedelta for s in raw_samples))) if any(not s.is_sync for s in raw_samples): boxes.append(_build_stss((s.is_sync for s in raw_samples))) return boxes @@ -248,7 +249,7 @@ def _update_sbtl_sample_offsets(trak: BoxDict, sample_offset: int) -> int: offset=sample_offset, size=sample.size, timedelta=sample.timedelta, - composition_offset=sample.composition_offset, + composition_timedelta=sample.composition_timedelta, is_sync=sample.is_sync, ) ) diff --git a/mapillary_tools/sample_video.py b/mapillary_tools/sample_video.py index 6cfdfe121..6c1d80afd 100644 --- a/mapillary_tools/sample_video.py +++ b/mapillary_tools/sample_video.py @@ -237,7 +237,7 @@ def _sample_video_stream_by_distance( sorted_samples = list(video_track_parser.parse_samples()) # we need sort sampels by composition time (CT) not the decoding offset (DT) # CT is the oder of videos streaming to audiences, as well as the order ffmpeg sampling - sorted_samples.sort(key=lambda sample: sample.composition_time_offset) + sorted_samples.sort(key=lambda sample: sample.exact_composition_time) LOG.info("Found total %d video samples", len(sorted_samples)) # interpolate sample points between the GPS track range (with 1ms buffer) @@ -251,11 +251,11 @@ def _sample_video_stream_by_distance( ( frame_idx_0based, video_sample, - interpolator.interpolate(video_sample.composition_time_offset), + interpolator.interpolate(video_sample.exact_composition_time), ) for frame_idx_0based, video_sample in enumerate(sorted_samples) if _within_track_time_range_buffered( - points, video_sample.composition_time_offset + points, video_sample.exact_composition_time ) ] LOG.info("Found total %d interpolated video samples", len(interp_sample_points)) @@ -350,8 +350,8 @@ def _sample_single_video_by_distance( video_sample, interp = sample_point assert ( - interp.time == video_sample.composition_time_offset - ), f"interpolated time {interp.time} should match the video sample time {video_sample.composition_time_offset}" + interp.time == video_sample.exact_composition_time + ), f"interpolated time {interp.time} should match the video sample time {video_sample.exact_composition_time}" timestamp = start_time + datetime.timedelta(seconds=interp.time) exif_edit = ExifEdit(sample_paths[0]) diff --git a/tests/unit/test_mp4_sample_parser.py b/tests/unit/test_mp4_sample_parser.py index 003fae370..1b08bdc6e 100644 --- a/tests/unit/test_mp4_sample_parser.py +++ b/tests/unit/test_mp4_sample_parser.py @@ -49,6 +49,6 @@ def test_movie_box_parser(): } assert isinstance(video_track.tkhd(), dict) for sample, raw_sample in zip(samples, raw_samples): - assert sample.offset == raw_sample.offset - assert sample.is_sync == raw_sample.is_sync - assert sample.size == raw_sample.size + assert sample.raw_sample.offset == raw_sample.offset + assert sample.raw_sample.is_sync == raw_sample.is_sync + assert sample.raw_sample.size == raw_sample.size diff --git a/tests/unit/test_simple_mp4_builder.py b/tests/unit/test_simple_mp4_builder.py index 88b00cad2..aaeba8efc 100644 --- a/tests/unit/test_simple_mp4_builder.py +++ b/tests/unit/test_simple_mp4_builder.py @@ -62,7 +62,7 @@ def test_build_stbl_happy(): offset=1, size=1, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), sample_parser.RawSample( @@ -70,7 +70,7 @@ def test_build_stbl_happy(): offset=2, size=9, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=False, ), ] @@ -82,7 +82,7 @@ def test_build_stbl_happy(): offset=1, size=1, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), sample_parser.RawSample( @@ -90,7 +90,7 @@ def test_build_stbl_happy(): offset=2, size=2, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=False, ), # another chunk here due to a 1-byte break @@ -99,7 +99,7 @@ def test_build_stbl_happy(): offset=5, size=1, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), sample_parser.RawSample( @@ -107,7 +107,7 @@ def test_build_stbl_happy(): offset=6, size=9, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=False, ), ] @@ -119,7 +119,7 @@ def test_build_stbl_happy(): offset=1, size=1, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=False, ), sample_parser.RawSample( @@ -127,7 +127,7 @@ def test_build_stbl_happy(): offset=2, size=2, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), # another chunk here @@ -136,7 +136,7 @@ def test_build_stbl_happy(): offset=4, size=1, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), # another chunk here @@ -145,7 +145,7 @@ def test_build_stbl_happy(): offset=5, size=9, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), ] @@ -157,7 +157,7 @@ def test_build_stbl_happy(): offset=1, size=1, timedelta=2, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), ] @@ -257,7 +257,7 @@ def test_parse_raw_samples_from_stbl(): offset=1, size=1, timedelta=20, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), sample_parser.RawSample( @@ -265,7 +265,7 @@ def test_parse_raw_samples_from_stbl(): offset=2, size=2, timedelta=30, - composition_offset=0, + composition_timedelta=0, is_sync=False, ), sample_parser.RawSample( @@ -273,7 +273,7 @@ def test_parse_raw_samples_from_stbl(): offset=5, size=3, timedelta=30, - composition_offset=0, + composition_timedelta=0, is_sync=True, ), sample_parser.RawSample( @@ -281,7 +281,7 @@ def test_parse_raw_samples_from_stbl(): offset=8, size=3, timedelta=50, - composition_offset=0, + composition_timedelta=0, is_sync=False, ), ] == samples From 89e4fd281fe89344b3a75b59f129230288685cba Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 4 Aug 2023 18:06:44 -0700 Subject: [PATCH 04/14] rename: import simple_mp4_parser as sparser --- mapillary_tools/geotag/blackvue_parser.py | 8 ++++---- mapillary_tools/geotag/camm_parser.py | 16 +++++++-------- .../geotag/geotag_videos_from_video.py | 8 ++++---- mapillary_tools/geotag/gpmf_parser.py | 8 ++++---- mapillary_tools/geotag/mp4_sample_parser.py | 12 +++++------ mapillary_tools/geotag/simple_mp4_builder.py | 6 +++--- tests/cli/simple_mp4_parser.py | 20 +++++++++---------- tests/unit/test_simple_mp4_builder.py | 4 ++-- tests/unit/test_simple_mp4_parser.py | 8 ++++---- 9 files changed, 45 insertions(+), 45 deletions(-) diff --git a/mapillary_tools/geotag/blackvue_parser.py b/mapillary_tools/geotag/blackvue_parser.py index a34d53278..812dc70a3 100644 --- a/mapillary_tools/geotag/blackvue_parser.py +++ b/mapillary_tools/geotag/blackvue_parser.py @@ -7,7 +7,7 @@ import pynmea2 from .. import geo -from . import simple_mp4_parser +from . import simple_mp4_parser as sparser LOG = logging.getLogger(__name__) @@ -55,8 +55,8 @@ def _parse_gps_box(gps_data: bytes) -> T.Generator[geo.Point, None, None]: def extract_camera_model(fp: T.BinaryIO) -> str: try: - cprt_bytes = simple_mp4_parser.parse_mp4_data_first(fp, [b"free", b"cprt"]) - except simple_mp4_parser.ParsingError: + cprt_bytes = sparser.parse_mp4_data_first(fp, [b"free", b"cprt"]) + except sparser.ParsingError: return "" if cprt_bytes is None: @@ -91,7 +91,7 @@ def extract_camera_model(fp: T.BinaryIO) -> str: def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]: - gps_data = simple_mp4_parser.parse_mp4_data_first(fp, [b"free", b"gps "]) + gps_data = sparser.parse_mp4_data_first(fp, [b"free", b"gps "]) if gps_data is None: return None diff --git a/mapillary_tools/geotag/camm_parser.py b/mapillary_tools/geotag/camm_parser.py index d653452f0..c529839ae 100644 --- a/mapillary_tools/geotag/camm_parser.py +++ b/mapillary_tools/geotag/camm_parser.py @@ -13,7 +13,7 @@ construct_mp4_parser as cparser, geo, mp4_sample_parser as sample_parser, - simple_mp4_parser as parser, + simple_mp4_parser as sparser, ) @@ -170,7 +170,7 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]: media_timescale = None elst_entries = None - for h, s in parser.parse_path(fp, [b"moov", [b"mvhd", b"trak"]]): + for h, s in sparser.parse_path(fp, [b"moov", [b"mvhd", b"trak"]]): if h.type == b"trak": trak_start_offset = s.tell() @@ -191,14 +191,14 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]: points = [p for p in points_with_nones if p is not None] if points: s.seek(trak_start_offset) - elst_data = parser.parse_box_data_first( + elst_data = sparser.parse_box_data_first( s, [b"edts", b"elst"], maxsize=h.maxsize ) if elst_data is not None: elst_entries = cparser.EditBox.parse(elst_data)["entries"] s.seek(trak_start_offset) - mdhd_data = parser.parse_box_data_firstx( + mdhd_data = sparser.parse_box_data_firstx( s, [b"mdia", b"mdhd"], maxsize=h.maxsize ) mdhd = cparser.MediaHeaderBox.parse(mdhd_data) @@ -238,7 +238,7 @@ def parse_gpx(path: pathlib.Path) -> T.List[geo.Point]: ) -def _decode_quietly(data: bytes, h: parser.Header) -> str: +def _decode_quietly(data: bytes, h: sparser.Header) -> str: try: return data.decode("utf-8") except UnicodeDecodeError: @@ -246,7 +246,7 @@ def _decode_quietly(data: bytes, h: parser.Header) -> str: return "" -def _parse_quietly(data: bytes, h: parser.Header) -> bytes: +def _parse_quietly(data: bytes, h: sparser.Header) -> bytes: try: parsed = MakeOrModel.parse(data) except C.ConstructError: @@ -256,7 +256,7 @@ def _parse_quietly(data: bytes, h: parser.Header) -> bytes: def extract_camera_make_and_model(fp: T.BinaryIO) -> T.Tuple[str, str]: - header_and_stream = parser.parse_path( + header_and_stream = sparser.parse_path( fp, [ b"moov", @@ -296,7 +296,7 @@ def extract_camera_make_and_model(fp: T.BinaryIO) -> T.Tuple[str, str]: # quit when both found if make and model: break - except parser.ParsingError: + except sparser.ParsingError: pass if make: diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index 77be8c6f4..42846ffc0 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -12,7 +12,7 @@ camm_parser, gpmf_gps_filter, gpmf_parser, - simple_mp4_parser as parser, + simple_mp4_parser as sparser, utils as video_utils, ) from .geotag_from_generic import GeotagVideosFromGeneric @@ -77,7 +77,7 @@ def _extract_video_metadata( with video_path.open("rb") as fp: try: points = camm_parser.extract_points(fp) - except parser.ParsingError: + except sparser.ParsingError: points = None if points is not None: @@ -100,7 +100,7 @@ def _extract_video_metadata( with video_path.open("rb") as fp: try: points_with_fix = gpmf_parser.extract_points(fp) - except parser.ParsingError: + except sparser.ParsingError: points_with_fix = None if points_with_fix is not None: @@ -123,7 +123,7 @@ def _extract_video_metadata( with video_path.open("rb") as fp: try: points = blackvue_parser.extract_points(fp) - except parser.ParsingError: + except sparser.ParsingError: points = None if points is not None: diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index f8a75056e..f4f984bea 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -11,7 +11,7 @@ import construct as C from .. import geo -from . import mp4_sample_parser as sample_parser, simple_mp4_parser as parser +from . import mp4_sample_parser as sample_parser, simple_mp4_parser as sparser """ Parsing GPS from GPMF data format stored in GoPros. See the GPMF spec: https://github.com/gopro/gpmf-parser @@ -309,7 +309,7 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.PointWithFix]]: otherwise None """ points = None - for h, s in parser.parse_path(fp, [b"moov", b"trak"]): + for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): trak_start_offset = s.tell() descriptions = _extract_gpmd_descriptions_from_trak(s, h.maxsize) if descriptions: @@ -346,7 +346,7 @@ def _extract_gpmd_samples_from_trak( def extract_all_device_names(fp: T.BinaryIO) -> T.Dict[int, bytes]: - for h, s in parser.parse_path(fp, [b"moov", b"trak"]): + for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): gpmd_samples = _extract_gpmd_samples_from_trak(s, h.maxsize) device_names = _extract_dvnm_from_samples(fp, gpmd_samples) if device_names: @@ -394,7 +394,7 @@ def parse_gpx(path: pathlib.Path) -> T.List[geo.PointWithFix]: def iterate_gpmd_sample_data(fp: T.BinaryIO) -> T.Generator[T.Dict, None, None]: - for h, s in parser.parse_path(fp, [b"moov", b"trak"]): + for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): gpmd_samples = _extract_gpmd_samples_from_trak(s, h.maxsize) for sample in gpmd_samples: fp.seek(sample.raw_sample.offset, io.SEEK_SET) diff --git a/mapillary_tools/geotag/mp4_sample_parser.py b/mapillary_tools/geotag/mp4_sample_parser.py index fa8d52422..c01d06376 100644 --- a/mapillary_tools/geotag/mp4_sample_parser.py +++ b/mapillary_tools/geotag/mp4_sample_parser.py @@ -3,7 +3,7 @@ import typing as T from pathlib import Path -from . import construct_mp4_parser as cparser, simple_mp4_parser as parser +from . import construct_mp4_parser as cparser, simple_mp4_parser as sparser class RawSample(T.NamedTuple): @@ -162,7 +162,7 @@ def parse_raw_samples_from_stbl( composition_timedeltas: T.Optional[T.List[int]] = None syncs: T.Optional[T.Set[int]] = None - for h, s in parser.parse_boxes(stbl, maxsize=maxsize, extend_eof=False): + for h, s in sparser.parse_boxes(stbl, maxsize=maxsize, extend_eof=False): if h.type == b"stsd": box = cparser.SampleDescriptionBox.parse(s.read(h.maxsize)) descriptions = list(box.entries) @@ -273,7 +273,7 @@ def parse_raw_samples_from_stbl_bytes( def parse_descriptions_from_trak(trak: T.BinaryIO, maxsize: int = -1) -> T.List[T.Dict]: - data = parser.parse_box_data_first( + data = sparser.parse_box_data_first( trak, [b"mdia", b"minf", b"stbl", b"stsd"], maxsize=maxsize ) if data is None: @@ -289,11 +289,11 @@ def parse_samples_from_trak( trak_start_offset = trak.tell() trak.seek(trak_start_offset, io.SEEK_SET) - mdhd_box = parser.parse_box_data_firstx(trak, [b"mdia", b"mdhd"], maxsize=maxsize) + mdhd_box = sparser.parse_box_data_firstx(trak, [b"mdia", b"mdhd"], maxsize=maxsize) mdhd = T.cast(T.Dict, cparser.MediaHeaderBox.parse(mdhd_box)) trak.seek(trak_start_offset, io.SEEK_SET) - h, s = parser.parse_box_path_firstx( + h, s = sparser.parse_box_path_firstx( trak, [b"mdia", b"minf", b"stbl"], maxsize=maxsize ) descriptions, raw_samples = parse_raw_samples_from_stbl(s, maxsize=h.maxsize) @@ -357,7 +357,7 @@ def __init__(self, moov: bytes): @classmethod def parse_file(cls, video_path: Path) -> "MovieBoxParser": with video_path.open("rb") as fp: - moov = parser.parse_box_data_firstx(fp, [b"moov"]) + moov = sparser.parse_box_data_firstx(fp, [b"moov"]) return MovieBoxParser(moov) def mvhd(self): diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index 5a5ece821..e4fce07ce 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -6,7 +6,7 @@ construct_mp4_parser as cparser, io_utils, mp4_sample_parser as sample_parser, - simple_mp4_parser as parser, + simple_mp4_parser as sparser, ) from .construct_mp4_parser import BoxDict from .mp4_sample_parser import RawSample @@ -335,11 +335,11 @@ def transform_mp4( ) -> io_utils.ChainedIO: # extract ftyp src_fp.seek(0) - ftyp_data = parser.parse_mp4_data_firstx(src_fp, [b"ftyp"]) + ftyp_data = sparser.parse_mp4_data_firstx(src_fp, [b"ftyp"]) # extract moov src_fp.seek(0) - moov_data = parser.parse_mp4_data_firstx(src_fp, [b"moov"]) + moov_data = sparser.parse_mp4_data_firstx(src_fp, [b"moov"]) moov_children = _MOOVChildrenParserConstruct.parse_boxlist(moov_data) # filter tracks in moov diff --git a/tests/cli/simple_mp4_parser.py b/tests/cli/simple_mp4_parser.py index 3fd6ae524..c15d844ca 100644 --- a/tests/cli/simple_mp4_parser.py +++ b/tests/cli/simple_mp4_parser.py @@ -9,7 +9,7 @@ from mapillary_tools.geotag import ( construct_mp4_parser as cparser, mp4_sample_parser as sample_parser, - simple_mp4_parser as parser, + simple_mp4_parser as sparser, ) LOG = logging.getLogger(__name__) @@ -37,7 +37,7 @@ def _validate_samples( samples: T.List[sample_parser.RawSample] = [] with open(path, "rb") as fp: - for h, s in parser.parse_path( + for h, s in sparser.parse_path( fp, [b"moov", b"trak", b"mdia", b"minf", b"stbl"] ): ( @@ -67,7 +67,7 @@ def _validate_samples( def _parse_structs(fp: T.BinaryIO): - for h, d, s in parser.parse_boxes_recursive(fp, box_list_types=box_list_types): + for h, d, s in sparser.parse_boxes_recursive(fp, box_list_types=box_list_types): margin = "\t" * d if h.size32 == 0: header = f"{str(h.type)} {h.box_size} (open-ended):" @@ -86,7 +86,7 @@ def _parse_structs(fp: T.BinaryIO): def _dump_box_data_at(fp: T.BinaryIO, box_type_path: T.List[bytes]): - for h, s in parser.parse_path(fp, box_type_path): + for h, s in sparser.parse_path(fp, box_type_path): max_chunk_size = 1024 read = 0 while read < h.maxsize or h.maxsize == -1: @@ -103,9 +103,9 @@ def _dump_box_data_at(fp: T.BinaryIO, box_type_path: T.List[bytes]): def _parse_samples(fp: T.BinaryIO, filters: T.Optional[T.Container[bytes]] = None): - for h, s in parser.parse_path(fp, [b"moov", b"trak"]): + for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): offset = s.tell() - for h1, s1 in parser.parse_path(s, [b"mdia", b"mdhd"], maxsize=h.maxsize): + for h1, s1 in sparser.parse_path(s, [b"mdia", b"mdhd"], maxsize=h.maxsize): box = cparser.MediaHeaderBox.parse(s1.read(h.maxsize)) LOG.info(box) LOG.info(sample_parser.to_datetime(box.creation_time)) @@ -117,7 +117,7 @@ def _parse_samples(fp: T.BinaryIO, filters: T.Optional[T.Container[bytes]] = Non def _dump_samples(fp: T.BinaryIO, filters: T.Optional[T.Container[bytes]] = None): - for h, s in parser.parse_path(fp, [b"moov", b"trak"]): + for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): for sample in sample_parser.parse_samples_from_trak(s, maxsize=h.maxsize): if filters is None or sample.description["format"] in filters: fp.seek(sample.offset, io.SEEK_SET) @@ -203,13 +203,13 @@ def _process_path(parsed_args, path: pathlib.Path): if box_path is None: _parse_structs(fp) else: - data = parser.parse_mp4_data_firstx(fp, box_path) + data = sparser.parse_mp4_data_firstx(fp, box_path) _parse_structs(io.BytesIO(data)) elif parsed_args.full: if box_path is None: boxes = cparser.MP4ParserConstruct.BoxList.parse_stream(fp) else: - data = parser.parse_mp4_data_firstx(fp, box_path) + data = sparser.parse_mp4_data_firstx(fp, box_path) boxes = cparser.MP4ParserConstruct.BoxList.parse_stream( io.BytesIO(data) ) @@ -222,7 +222,7 @@ def _process_path(parsed_args, path: pathlib.Path): ) ) else: - data = parser.parse_mp4_data_firstx(fp, box_path) + data = sparser.parse_mp4_data_firstx(fp, box_path) boxes = ( cparser.MP4WithoutSTBLParserConstruct.BoxList.parse_stream( io.BytesIO(data) diff --git a/tests/unit/test_simple_mp4_builder.py b/tests/unit/test_simple_mp4_builder.py index aaeba8efc..e6edabe47 100644 --- a/tests/unit/test_simple_mp4_builder.py +++ b/tests/unit/test_simple_mp4_builder.py @@ -5,7 +5,7 @@ construct_mp4_parser as cparser, mp4_sample_parser as sample_parser, simple_mp4_builder as builder, - simple_mp4_parser as parser, + simple_mp4_parser as sparser, ) @@ -44,7 +44,7 @@ def _build_and_parse_stbl( d = cparser.Box32ConstructBuilder({b"stbl": cparser.CMAP[b"stbl"]}).Box.build( {"type": b"stbl", "data": s} ) - ss = parser.parse_box_data_firstx(io.BytesIO(d), [b"stbl"]) + ss = sparser.parse_box_data_firstx(io.BytesIO(d), [b"stbl"]) assert d[8:] == ss _, parsed_samples = sample_parser.parse_raw_samples_from_stbl(io.BytesIO(ss)) assert expected_samples == list(parsed_samples) diff --git a/tests/unit/test_simple_mp4_parser.py b/tests/unit/test_simple_mp4_parser.py index 19701398c..eaeb7142b 100644 --- a/tests/unit/test_simple_mp4_parser.py +++ b/tests/unit/test_simple_mp4_parser.py @@ -3,7 +3,7 @@ from mapillary_tools.geotag import ( construct_mp4_parser as cparser, - simple_mp4_parser as parser, + simple_mp4_parser as sparser, ) @@ -26,7 +26,7 @@ def _parse(data: bytes): } consumed_size = 0 ret = [] - for h, _d, s in parser.parse_boxes_recursive( + for h, _d, s in sparser.parse_boxes_recursive( io.BytesIO(data), box_list_types=box_list_types ): box_data = s.read(h.maxsize) @@ -42,7 +42,7 @@ def _parse(data: bytes): def _assert_box_type( data: bytes, - parsed: typing.List[typing.Tuple[parser.Header, bytes]], + parsed: typing.List[typing.Tuple[sparser.Header, bytes]], box_type: bytes, ): assert 1 == len(parsed) @@ -55,7 +55,7 @@ def _assert_box_type( def test_parse_box_header(): s = io.BytesIO(b"hello") - header = parser.parse_box_header(s, maxsize=0) + header = sparser.parse_box_header(s, maxsize=0) assert header.header_size == 0 assert header.box_size == 0 assert header.type == b"" From 74fd1d110d80064b86560b2e66ad369ad8c001da Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 10 Aug 2023 18:25:49 -0400 Subject: [PATCH 05/14] refactor mp4 sample parser --- mapillary_tools/geotag/camm_parser.py | 2 +- mapillary_tools/geotag/gpmf_parser.py | 2 +- mapillary_tools/geotag/mp4_sample_parser.py | 170 ++++++------------- mapillary_tools/geotag/simple_mp4_builder.py | 4 +- tests/cli/simple_mp4_parser.py | 12 +- tests/unit/test_simple_mp4_builder.py | 156 +++++++++-------- 6 files changed, 145 insertions(+), 201 deletions(-) diff --git a/mapillary_tools/geotag/camm_parser.py b/mapillary_tools/geotag/camm_parser.py index c529839ae..35553364e 100644 --- a/mapillary_tools/geotag/camm_parser.py +++ b/mapillary_tools/geotag/camm_parser.py @@ -152,7 +152,7 @@ def _extract_camm_samples( s: T.BinaryIO, maxsize: int = -1, ) -> T.Generator[sample_parser.Sample, None, None]: - samples = sample_parser.parse_samples_from_trak(s, maxsize=maxsize) + samples = sample_parser.parse_samples_from_trak_DEPRECATED(s, maxsize=maxsize) camm_samples = ( sample for sample in samples if sample.description["format"] == b"camm" ) diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index f4f984bea..3deb7fe00 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -339,7 +339,7 @@ def _extract_gpmd_samples_from_trak( gpmd_descriptions = _extract_gpmd_descriptions_from_trak(s, maxsize=maxsize) if gpmd_descriptions: s.seek(trak_start_offset, io.SEEK_SET) - samples = sample_parser.parse_samples_from_trak(s, maxsize=maxsize) + samples = sample_parser.parse_samples_from_trak_DEPRECATED(s, maxsize=maxsize) for sample in samples: if sample.description["format"] == b"gpmd": yield sample diff --git a/mapillary_tools/geotag/mp4_sample_parser.py b/mapillary_tools/geotag/mp4_sample_parser.py index c01d06376..79dcc7d86 100644 --- a/mapillary_tools/geotag/mp4_sample_parser.py +++ b/mapillary_tools/geotag/mp4_sample_parser.py @@ -1,5 +1,4 @@ import datetime -import io import typing as T from pathlib import Path @@ -146,77 +145,12 @@ def _extract_samples( acc_delta += raw_sample.timedelta -def parse_raw_samples_from_stbl( - stbl: T.BinaryIO, - maxsize: int = -1, -) -> T.Tuple[T.List[T.Dict], T.Generator[RawSample, None, None]]: - """ - DEPRECATED: use parse_raw_samples_from_stbl_bytes instead - """ - - descriptions = [] - sizes = [] - chunk_offsets = [] - chunk_entries = [] - timedeltas: T.List[int] = [] - composition_timedeltas: T.Optional[T.List[int]] = None - syncs: T.Optional[T.Set[int]] = None - - for h, s in sparser.parse_boxes(stbl, maxsize=maxsize, extend_eof=False): - if h.type == b"stsd": - box = cparser.SampleDescriptionBox.parse(s.read(h.maxsize)) - descriptions = list(box.entries) - elif h.type == b"stsz": - box = cparser.SampleSizeBox.parse(s.read(h.maxsize)) - if box.sample_size == 0: - sizes = list(box.entries) - else: - sizes = [box.sample_size for _ in range(box.sample_count)] - elif h.type == b"stco": - box = cparser.ChunkOffsetBox.parse(s.read(h.maxsize)) - chunk_offsets = list(box.entries) - elif h.type == b"co64": - box = cparser.ChunkLargeOffsetBox.parse(s.read(h.maxsize)) - chunk_offsets = list(box.entries) - elif h.type == b"stsc": - box = cparser.SampleToChunkBox.parse(s.read(h.maxsize)) - chunk_entries = list(box.entries) - elif h.type == b"stts": - timedeltas = [] - box = cparser.TimeToSampleBox.parse(s.read(h.maxsize)) - for entry in box.entries: - for _ in range(entry.sample_count): - timedeltas.append(entry.sample_delta) - elif h.type == b"ctts": - composition_timedeltas = [] - box = cparser.CompositionTimeToSampleBox.parse(s.read(h.maxsize)) - for entry in box.entries: - for _ in range(entry.sample_count): - composition_timedeltas.append(entry.sample_offset) - elif h.type == b"stss": - box = cparser.SyncSampleBox.parse(s.read(h.maxsize)) - syncs = set(box.entries) - - # some stbl have less timedeltas than the sample count i.e. len(sizes), - # in this case append 0's to timedeltas - while len(timedeltas) < len(sizes): - timedeltas.append(0) - if composition_timedeltas is not None: - while len(composition_timedeltas) < len(sizes): - composition_timedeltas.append(0) - - raw_samples = _extract_raw_samples( - sizes, chunk_entries, chunk_offsets, timedeltas, composition_timedeltas, syncs - ) - return descriptions, raw_samples - - STBLBoxlistConstruct = cparser.Box64ConstructBuilder( T.cast(cparser.SwitchMapType, cparser.CMAP[b"stbl"]) ).BoxList -def parse_raw_samples_from_stbl_bytes( +def parse_raw_samples_from_stbl_data( stbl: bytes, ) -> T.Tuple[T.List[T.Dict], T.Generator[RawSample, None, None]]: descriptions = [] @@ -227,9 +161,11 @@ def parse_raw_samples_from_stbl_bytes( composition_timedeltas: T.Optional[T.List[int]] = None syncs: T.Optional[T.Set[int]] = None - stbl_boxes = T.cast(T.Sequence[cparser.BoxDict], STBLBoxlistConstruct.parse(stbl)) + stbl_children = T.cast( + T.Sequence[cparser.BoxDict], STBLBoxlistConstruct.parse(stbl) + ) - for box in stbl_boxes: + for box in stbl_children: data: T.Dict = T.cast(T.Dict, box["data"]) if box["type"] == b"stsd": @@ -272,86 +208,70 @@ def parse_raw_samples_from_stbl_bytes( return descriptions, raw_samples -def parse_descriptions_from_trak(trak: T.BinaryIO, maxsize: int = -1) -> T.List[T.Dict]: - data = sparser.parse_box_data_first( - trak, [b"mdia", b"minf", b"stbl", b"stsd"], maxsize=maxsize - ) - if data is None: - return [] - box = cparser.SampleDescriptionBox.parse(data) - return list(box.entries) - - -def parse_samples_from_trak( - trak: T.BinaryIO, - maxsize: int = -1, -) -> T.Generator[Sample, None, None]: - trak_start_offset = trak.tell() - - trak.seek(trak_start_offset, io.SEEK_SET) - mdhd_box = sparser.parse_box_data_firstx(trak, [b"mdia", b"mdhd"], maxsize=maxsize) - mdhd = T.cast(T.Dict, cparser.MediaHeaderBox.parse(mdhd_box)) - - trak.seek(trak_start_offset, io.SEEK_SET) - h, s = sparser.parse_box_path_firstx( - trak, [b"mdia", b"minf", b"stbl"], maxsize=maxsize - ) - descriptions, raw_samples = parse_raw_samples_from_stbl(s, maxsize=h.maxsize) - - yield from _extract_samples(raw_samples, descriptions, mdhd["timescale"]) - - -STSDBoxListConstruct = cparser.Box64ConstructBuilder( +_STSDBoxListConstruct = cparser.Box64ConstructBuilder( # pyre-ignore[6]: pyre does not support recursive type SwitchMapType {b"stsd": cparser.CMAP[b"stsd"]} ).BoxList class TrackBoxParser: - trak_boxes: T.Sequence[cparser.BoxDict] + trak_children: T.Sequence[cparser.BoxDict] stbl_data: bytes - def __init__(self, trak_boxes: T.Sequence[cparser.BoxDict]): - self.trak_boxes = trak_boxes - stbl = cparser.find_box_at_pathx(self.trak_boxes, [b"mdia", b"minf", b"stbl"]) + def __init__(self, trak_children: T.Sequence[cparser.BoxDict]): + self.trak_children = trak_children + stbl = cparser.find_box_at_pathx( + self.trak_children, [b"mdia", b"minf", b"stbl"] + ) self.stbl_data = T.cast(bytes, stbl["data"]) def tkhd(self) -> T.Dict: return T.cast( - T.Dict, cparser.find_box_at_pathx(self.trak_boxes, [b"tkhd"])["data"] + T.Dict, cparser.find_box_at_pathx(self.trak_children, [b"tkhd"])["data"] ) def is_video_track(self) -> bool: - hdlr = cparser.find_box_at_pathx(self.trak_boxes, [b"mdia", b"hdlr"]) + hdlr = cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"hdlr"]) return T.cast(T.Dict[str, T.Any], hdlr["data"])["handler_type"] == b"vide" - def parse_sample_description(self) -> T.Dict: - boxes = STSDBoxListConstruct.parse(self.stbl_data) + def parse_sample_descriptions(self) -> T.List[T.Dict]: + # TODO: return [] if parsing fail + boxes = _STSDBoxListConstruct.parse(self.stbl_data) stsd = cparser.find_box_at_pathx( T.cast(T.Sequence[cparser.BoxDict], boxes), [b"stsd"] ) - return T.cast(T.Dict, stsd["data"]) + return T.cast(T.List[T.Dict], T.cast(T.Dict, stsd["data"])["entries"]) + + def extract_elst_boxdata(self) -> T.Optional[T.Dict]: + box = cparser.find_box_at_path(self.trak_children, [b"edts", b"elst"]) + if box is None: + return None + return T.cast(T.Dict, box["data"]) + + def extract_mdhd_boxdata(self) -> T.Dict: + box = cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"mdhd"]) + return T.cast(T.Dict, box["data"]) def parse_raw_samples(self) -> T.Generator[RawSample, None, None]: - _, raw_samples = parse_raw_samples_from_stbl_bytes(self.stbl_data) + _, raw_samples = parse_raw_samples_from_stbl_data(self.stbl_data) yield from raw_samples def parse_samples(self) -> T.Generator[Sample, None, None]: - descriptions, raw_samples = parse_raw_samples_from_stbl_bytes(self.stbl_data) + descriptions, raw_samples = parse_raw_samples_from_stbl_data(self.stbl_data) mdhd = T.cast( T.Dict, - cparser.find_box_at_pathx(self.trak_boxes, [b"mdia", b"mdhd"])["data"], + cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"mdhd"])["data"], ) yield from _extract_samples(raw_samples, descriptions, mdhd["timescale"]) class MovieBoxParser: - moov_boxes: T.Sequence[cparser.BoxDict] + moov_children: T.Sequence[cparser.BoxDict] - def __init__(self, moov: bytes): - self.moov_boxes = T.cast( + def __init__(self, moov_data: bytes): + self.moov_children = T.cast( T.Sequence[cparser.BoxDict], - cparser.MOOVWithoutSTBLBuilderConstruct.BoxList.parse(moov), + cparser.MOOVWithoutSTBLBuilderConstruct.BoxList.parse(moov_data), ) @classmethod @@ -360,12 +280,17 @@ def parse_file(cls, video_path: Path) -> "MovieBoxParser": moov = sparser.parse_box_data_firstx(fp, [b"moov"]) return MovieBoxParser(moov) - def mvhd(self): - mvhd = cparser.find_box_at_pathx(self.moov_boxes, [b"mvhd"]) - return mvhd["data"] + @classmethod + def parse_stream(cls, stream: T.BinaryIO) -> "MovieBoxParser": + moov = sparser.parse_box_data_firstx(stream, [b"moov"]) + return MovieBoxParser(moov) + + def mvhd(self) -> T.Dict: + mvhd = cparser.find_box_at_pathx(self.moov_children, [b"mvhd"]) + return T.cast(T.Dict, mvhd["data"]) def parse_tracks(self) -> T.Generator[TrackBoxParser, None, None]: - for box in self.moov_boxes: + for box in self.moov_children: if box["type"] == b"trak": yield TrackBoxParser(T.cast(T.Sequence[cparser.BoxDict], box["data"])) @@ -374,16 +299,17 @@ def parse_track_at(self, stream_idx: int) -> TrackBoxParser: stream_idx should be the stream_index specifier. See http://ffmpeg.org/ffmpeg.html#Stream-specifiers-1 > Stream numbering is based on the order of the streams as detected by libavformat """ - trak_boxes = [box for box in self.moov_boxes if box["type"] == b"trak"] + trak_boxes = [box for box in self.moov_children if box["type"] == b"trak"] if not (0 <= stream_idx < len(trak_boxes)): raise IndexError( "unable to read stream at %d from the track list (length %d)", stream_idx, len(trak_boxes), ) - return TrackBoxParser( - T.cast(T.Sequence[cparser.BoxDict], trak_boxes[stream_idx]["data"]) + trak_children = T.cast( + T.Sequence[cparser.BoxDict], trak_boxes[stream_idx]["data"] ) + return TrackBoxParser(trak_children) _DT_1904 = datetime.datetime.utcfromtimestamp(0).replace(year=1904) diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index e4fce07ce..a3f097196 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -255,7 +255,7 @@ def _update_sbtl_sample_offsets(trak: BoxDict, sample_offset: int) -> int: ) sample_offset += sample.size stbl_box = cparser.find_box_at_pathx(trak, [b"trak", b"mdia", b"minf", b"stbl"]) - descriptions, _ = sample_parser.parse_raw_samples_from_stbl( + descriptions, _ = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( io.BytesIO(T.cast(bytes, stbl_box["data"])) ) stbl_children_boxes = build_stbl_from_raw_samples( @@ -274,7 +274,7 @@ def iterate_samples( stbl_box = cparser.find_box_at_pathx( box, [b"trak", b"mdia", b"minf", b"stbl"] ) - _, raw_samples_iter = sample_parser.parse_raw_samples_from_stbl( + _, raw_samples_iter = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( io.BytesIO(T.cast(bytes, stbl_box["data"])) ) yield from raw_samples_iter diff --git a/tests/cli/simple_mp4_parser.py b/tests/cli/simple_mp4_parser.py index c15d844ca..74f24056d 100644 --- a/tests/cli/simple_mp4_parser.py +++ b/tests/cli/simple_mp4_parser.py @@ -43,7 +43,9 @@ def _validate_samples( ( descriptions, raw_samples, - ) = sample_parser.parse_raw_samples_from_stbl(s, maxsize=h.maxsize) + ) = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( + s, maxsize=h.maxsize + ) samples.extend( sample for sample in raw_samples @@ -111,14 +113,18 @@ def _parse_samples(fp: T.BinaryIO, filters: T.Optional[T.Container[bytes]] = Non LOG.info(sample_parser.to_datetime(box.creation_time)) LOG.info(box.duration / box.timescale) s.seek(offset, io.SEEK_SET) - for sample in sample_parser.parse_samples_from_trak(s, maxsize=h.maxsize): + for sample in sample_parser.parse_samples_from_trak_DEPRECATED( + s, maxsize=h.maxsize + ): if filters is None or sample.description["format"] in filters: print(sample) def _dump_samples(fp: T.BinaryIO, filters: T.Optional[T.Container[bytes]] = None): for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): - for sample in sample_parser.parse_samples_from_trak(s, maxsize=h.maxsize): + for sample in sample_parser.parse_samples_from_trak_DEPRECATED( + s, maxsize=h.maxsize + ): if filters is None or sample.description["format"] in filters: fp.seek(sample.offset, io.SEEK_SET) data = fp.read(sample.size) diff --git a/tests/unit/test_simple_mp4_builder.py b/tests/unit/test_simple_mp4_builder.py index e6edabe47..27cbbd84c 100644 --- a/tests/unit/test_simple_mp4_builder.py +++ b/tests/unit/test_simple_mp4_builder.py @@ -46,7 +46,11 @@ def _build_and_parse_stbl( ) ss = sparser.parse_box_data_firstx(io.BytesIO(d), [b"stbl"]) assert d[8:] == ss - _, parsed_samples = sample_parser.parse_raw_samples_from_stbl(io.BytesIO(ss)) + _, parsed_samples = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( + io.BytesIO(ss) + ) + assert expected_samples == list(parsed_samples) + _, parsed_samples = sample_parser.parse_raw_samples_from_stbl_data(ss) assert expected_samples == list(parsed_samples) @@ -247,80 +251,88 @@ def test_parse_raw_samples_from_stbl(): }, ] ) - descs, sample_iter = sample_parser.parse_raw_samples_from_stbl( + + def _verify_samples(descs, samples): + assert [ + sample_parser.RawSample( + description_idx=1, + offset=1, + size=1, + timedelta=20, + composition_timedelta=0, + is_sync=True, + ), + sample_parser.RawSample( + description_idx=1, + offset=2, + size=2, + timedelta=30, + composition_timedelta=0, + is_sync=False, + ), + sample_parser.RawSample( + description_idx=1, + offset=5, + size=3, + timedelta=30, + composition_timedelta=0, + is_sync=True, + ), + sample_parser.RawSample( + description_idx=1, + offset=8, + size=3, + timedelta=50, + composition_timedelta=0, + is_sync=False, + ), + ] == samples + d = builder.build_stbl_from_raw_samples(descs, samples) + assert d[1:] == [ + { + "data": { + "entries": [ + {"sample_count": 1, "sample_delta": 20}, + {"sample_count": 2, "sample_delta": 30}, + {"sample_count": 1, "sample_delta": 50}, + ] + }, + "type": b"stts", + }, + { + "data": { + "entries": [ + { + "first_chunk": 1, + "sample_description_index": 1, + "samples_per_chunk": 2, + }, + { + "first_chunk": 2, + "sample_description_index": 1, + "samples_per_chunk": 2, + }, + ] + }, + "type": b"stsc", + }, + { + "data": {"entries": [1, 2, 3, 3], "sample_count": 4, "sample_size": 0}, + "type": b"stsz", + }, + {"data": {"entries": [1, 5]}, "type": b"co64"}, + {"data": {"entries": [1, 3]}, "type": b"stss"}, + ] + + descs, sample_iter = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( io.BytesIO(stbl_bytes) ) samples = list(sample_iter) - assert [ - sample_parser.RawSample( - description_idx=1, - offset=1, - size=1, - timedelta=20, - composition_timedelta=0, - is_sync=True, - ), - sample_parser.RawSample( - description_idx=1, - offset=2, - size=2, - timedelta=30, - composition_timedelta=0, - is_sync=False, - ), - sample_parser.RawSample( - description_idx=1, - offset=5, - size=3, - timedelta=30, - composition_timedelta=0, - is_sync=True, - ), - sample_parser.RawSample( - description_idx=1, - offset=8, - size=3, - timedelta=50, - composition_timedelta=0, - is_sync=False, - ), - ] == samples - d = builder.build_stbl_from_raw_samples(descs, samples) - assert d[1:] == [ - { - "data": { - "entries": [ - {"sample_count": 1, "sample_delta": 20}, - {"sample_count": 2, "sample_delta": 30}, - {"sample_count": 1, "sample_delta": 50}, - ] - }, - "type": b"stts", - }, - { - "data": { - "entries": [ - { - "first_chunk": 1, - "sample_description_index": 1, - "samples_per_chunk": 2, - }, - { - "first_chunk": 2, - "sample_description_index": 1, - "samples_per_chunk": 2, - }, - ] - }, - "type": b"stsc", - }, - { - "data": {"entries": [1, 2, 3, 3], "sample_count": 4, "sample_size": 0}, - "type": b"stsz", - }, - {"data": {"entries": [1, 5]}, "type": b"co64"}, - {"data": {"entries": [1, 3]}, "type": b"stss"}, - ] + _verify_samples(descs, samples) + + descs, sample_iter = sample_parser.parse_raw_samples_from_stbl_data(stbl_bytes) + samples = list(sample_iter) + _verify_samples(descs, samples) def test_box_header_0_building(): From d42e58eed6ea6fedd7cfa7e38c9a502f20387038 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 10 Aug 2023 18:26:47 -0400 Subject: [PATCH 06/14] refactor camm parser --- mapillary_tools/geotag/camm_parser.py | 92 +++++++------------ .../geotag/construct_mp4_parser.py | 30 ++++-- 2 files changed, 51 insertions(+), 71 deletions(-) diff --git a/mapillary_tools/geotag/camm_parser.py b/mapillary_tools/geotag/camm_parser.py index 35553364e..777d59f7f 100644 --- a/mapillary_tools/geotag/camm_parser.py +++ b/mapillary_tools/geotag/camm_parser.py @@ -10,7 +10,6 @@ import construct as C from . import ( - construct_mp4_parser as cparser, geo, mp4_sample_parser as sample_parser, simple_mp4_parser as sparser, @@ -148,15 +147,8 @@ def elst_entry_to_seconds( return (media_time, duration) -def _extract_camm_samples( - s: T.BinaryIO, - maxsize: int = -1, -) -> T.Generator[sample_parser.Sample, None, None]: - samples = sample_parser.parse_samples_from_trak_DEPRECATED(s, maxsize=maxsize) - camm_samples = ( - sample for sample in samples if sample.description["format"] == b"camm" - ) - yield from camm_samples +def _is_camm_description(description: T.Dict) -> bool: + return description["format"] == b"camm" def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]: @@ -166,59 +158,37 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]: """ points = None - movie_timescale = None - media_timescale = None - elst_entries = None - - for h, s in sparser.parse_path(fp, [b"moov", [b"mvhd", b"trak"]]): - if h.type == b"trak": - trak_start_offset = s.tell() - descriptions = sample_parser.parse_descriptions_from_trak( - s, maxsize=h.maxsize + moov = sample_parser.MovieBoxParser.parse_stream(fp) + for track in moov.parse_tracks(): + descriptions = track.parse_sample_descriptions() + if any(_is_camm_description(d) for d in descriptions): + maybe_points = ( + _parse_point_from_sample(fp, sample) + for sample in track.parse_samples() + if _is_camm_description(sample.description) ) - camm_descriptions = [d for d in descriptions if d["format"] == b"camm"] - if camm_descriptions: - s.seek(trak_start_offset, io.SEEK_SET) - camm_samples = _extract_camm_samples(s, h.maxsize) - - points_with_nones = ( - _parse_point_from_sample(fp, sample) - for sample in camm_samples - if sample.description["format"] == b"camm" - ) - - points = [p for p in points_with_nones if p is not None] - if points: - s.seek(trak_start_offset) - elst_data = sparser.parse_box_data_first( - s, [b"edts", b"elst"], maxsize=h.maxsize - ) - if elst_data is not None: - elst_entries = cparser.EditBox.parse(elst_data)["entries"] - - s.seek(trak_start_offset) - mdhd_data = sparser.parse_box_data_firstx( - s, [b"mdia", b"mdhd"], maxsize=h.maxsize - ) - mdhd = cparser.MediaHeaderBox.parse(mdhd_data) - media_timescale = mdhd["timescale"] - else: - assert h.type == b"mvhd" - if not movie_timescale: - mvhd = cparser.MovieHeaderBox.parse(s.read(h.maxsize)) - movie_timescale = mvhd["timescale"] - - # exit when both found - if movie_timescale is not None and points: - break - - if points and movie_timescale and media_timescale and elst_entries: - segments = [ - elst_entry_to_seconds(entry, movie_timescale, media_timescale) - for entry in elst_entries - ] - points = list(filter_points_by_elst(points, segments)) + points = [p for p in maybe_points if p is not None] + if points: + elst_boxdata = track.extract_elst_boxdata() + if elst_boxdata is not None: + elst_entries = elst_boxdata["entries"] + if elst_entries: + # media_timescale + mdhd_boxdata = track.extract_mdhd_boxdata() + media_timescale = mdhd_boxdata["timescale"] + # movie_timescale + mvhd_boxdata = moov.mvhd() + movie_timescale = mvhd_boxdata["timescale"] + segments = [ + elst_entry_to_seconds( + entry, + movie_timescale=movie_timescale, + media_timescale=media_timescale, + ) + for entry in elst_entries + ] + points = list(filter_points_by_elst(points, segments)) return points diff --git a/mapillary_tools/geotag/construct_mp4_parser.py b/mapillary_tools/geotag/construct_mp4_parser.py index 201a692f5..f11d6e0f0 100644 --- a/mapillary_tools/geotag/construct_mp4_parser.py +++ b/mapillary_tools/geotag/construct_mp4_parser.py @@ -592,8 +592,17 @@ def _new_cmap_without_boxes( def find_box_at_pathx( box: T.Union[T.Sequence[BoxDict], BoxDict], path: T.Sequence[bytes] ) -> BoxDict: - if not path: + found = find_box_at_path(box, path) + if found is None: raise ValueError(f"box at path {path} not found") + return found + + +def find_box_at_path( + box: T.Union[T.Sequence[BoxDict], BoxDict], path: T.Sequence[bytes] +) -> T.Optional[BoxDict]: + if not path: + return None boxes: T.Sequence[BoxDict] if isinstance(box, dict): @@ -605,12 +614,13 @@ def find_box_at_pathx( if box["type"] == path[0]: if len(path) == 1: return box - else: - box_data = T.cast(T.Sequence[BoxDict], box["data"]) - # ListContainer from construct is not sequence - assert isinstance( - box_data, T.Sequence - ), f"expect a list of boxes but got {type(box_data)} at path {path}" - return find_box_at_pathx(box_data, path[1:]) - - raise ValueError(f"box at path {path} not found") + box_data = T.cast(T.Sequence[BoxDict], box["data"]) + # ListContainer from construct is not sequence + assert isinstance( + box_data, T.Sequence + ), f"expect a list of boxes but got {type(box_data)} at path {path}" + found = find_box_at_path(box_data, path[1:]) + if found is not None: + return found + + return None From 90961fd643fd0a7cc832fc557383e5325d96d516 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 10 Aug 2023 18:27:21 -0400 Subject: [PATCH 07/14] refactor gpmf parser --- mapillary_tools/geotag/gpmf_parser.py | 69 ++++++++------------ mapillary_tools/geotag/simple_mp4_builder.py | 12 ++-- 2 files changed, 32 insertions(+), 49 deletions(-) diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index 3deb7fe00..439a8ab3d 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -11,7 +11,7 @@ import construct as C from .. import geo -from . import mp4_sample_parser as sample_parser, simple_mp4_parser as sparser +from . import mp4_sample_parser as sample_parser """ Parsing GPS from GPMF data format stored in GoPros. See the GPMF spec: https://github.com/gopro/gpmf-parser @@ -303,18 +303,25 @@ def _extract_points_from_samples( return values[0] if values else [] +def _is_gpmd_description(description: T.Dict) -> bool: + return description["format"] == b"gpmd" + + def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.PointWithFix]]: """ Return a list of points (could be empty) if it is a valid GoPro video, otherwise None """ points = None - for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): - trak_start_offset = s.tell() - descriptions = _extract_gpmd_descriptions_from_trak(s, h.maxsize) - if descriptions: - s.seek(trak_start_offset, io.SEEK_SET) - gpmd_samples = _extract_gpmd_samples_from_trak(s, h.maxsize) + moov = sample_parser.MovieBoxParser.parse_stream(fp) + for track in moov.parse_tracks(): + descriptions = track.parse_sample_descriptions() + if any(_is_gpmd_description(d) for d in descriptions): + gpmd_samples = ( + sample + for sample in track.parse_samples() + if _is_gpmd_description(sample.description) + ) points = list(_extract_points_from_samples(fp, gpmd_samples)) # return the firstly found non-empty points if points: @@ -323,34 +330,19 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.PointWithFix]]: return points -def _extract_gpmd_descriptions_from_trak( - s: T.BinaryIO, - maxsize: int = -1, -): - descriptions = sample_parser.parse_descriptions_from_trak(s, maxsize=maxsize) - return [d for d in descriptions if d["format"] == b"gpmd"] - - -def _extract_gpmd_samples_from_trak( - s: T.BinaryIO, - maxsize: int = -1, -) -> T.Generator[sample_parser.Sample, None, None]: - trak_start_offset = s.tell() - gpmd_descriptions = _extract_gpmd_descriptions_from_trak(s, maxsize=maxsize) - if gpmd_descriptions: - s.seek(trak_start_offset, io.SEEK_SET) - samples = sample_parser.parse_samples_from_trak_DEPRECATED(s, maxsize=maxsize) - for sample in samples: - if sample.description["format"] == b"gpmd": - yield sample - - def extract_all_device_names(fp: T.BinaryIO) -> T.Dict[int, bytes]: - for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): - gpmd_samples = _extract_gpmd_samples_from_trak(s, h.maxsize) - device_names = _extract_dvnm_from_samples(fp, gpmd_samples) - if device_names: - return device_names + moov = sample_parser.MovieBoxParser.parse_stream(fp) + for track in moov.parse_tracks(): + descriptions = track.parse_sample_descriptions() + if any(_is_gpmd_description(d) for d in descriptions): + gpmd_samples = ( + sample + for sample in track.parse_samples() + if _is_gpmd_description(sample.description) + ) + device_names = _extract_dvnm_from_samples(fp, gpmd_samples) + if device_names: + return device_names return {} @@ -391,12 +383,3 @@ def parse_gpx(path: pathlib.Path) -> T.List[geo.PointWithFix]: if points is None: return [] return points - - -def iterate_gpmd_sample_data(fp: T.BinaryIO) -> T.Generator[T.Dict, None, None]: - for h, s in sparser.parse_path(fp, [b"moov", b"trak"]): - gpmd_samples = _extract_gpmd_samples_from_trak(s, h.maxsize) - for sample in gpmd_samples: - fp.seek(sample.raw_sample.offset, io.SEEK_SET) - data = fp.read(sample.raw_sample.size) - yield T.cast(T.Dict, GPMFSampleData.parse(data)) diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index a3f097196..4b19605b5 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -15,8 +15,8 @@ Variable naming conventions: - *_box: a BoxDict -- *_boxes: a list of BoxDicts -- *_children: a list of BoxDicts under the parent box +- *_children: a list of child BoxDicts under the parent box +- *_boxdata: BoxDict["data"] - *_data: the data in bytes of a box (without the header (type and size)) - *_typed_data: the data in bytes of a box (with the header (type and size)) """ @@ -255,8 +255,8 @@ def _update_sbtl_sample_offsets(trak: BoxDict, sample_offset: int) -> int: ) sample_offset += sample.size stbl_box = cparser.find_box_at_pathx(trak, [b"trak", b"mdia", b"minf", b"stbl"]) - descriptions, _ = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( - io.BytesIO(T.cast(bytes, stbl_box["data"])) + descriptions, _ = sample_parser.parse_raw_samples_from_stbl_data( + T.cast(bytes, stbl_box["data"]) ) stbl_children_boxes = build_stbl_from_raw_samples( descriptions, repositioned_samples @@ -274,8 +274,8 @@ def iterate_samples( stbl_box = cparser.find_box_at_pathx( box, [b"trak", b"mdia", b"minf", b"stbl"] ) - _, raw_samples_iter = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( - io.BytesIO(T.cast(bytes, stbl_box["data"])) + _, raw_samples_iter = sample_parser.parse_raw_samples_from_stbl_data( + T.cast(bytes, stbl_box["data"]) ) yield from raw_samples_iter From 9abba003b75dcb58a2697fa0cb854309976db2a8 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 10 Aug 2023 18:48:36 -0400 Subject: [PATCH 08/14] rename parse_ to extract_ --- mapillary_tools/geotag/camm_parser.py | 8 ++++---- mapillary_tools/geotag/gpmf_parser.py | 12 ++++++------ mapillary_tools/geotag/mp4_sample_parser.py | 20 ++++++++++---------- mapillary_tools/geotag/simple_mp4_builder.py | 4 ++-- mapillary_tools/sample_video.py | 4 ++-- tests/unit/test_mp4_sample_parser.py | 14 +++++++------- tests/unit/test_simple_mp4_builder.py | 4 ++-- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/mapillary_tools/geotag/camm_parser.py b/mapillary_tools/geotag/camm_parser.py index 777d59f7f..406985fa2 100644 --- a/mapillary_tools/geotag/camm_parser.py +++ b/mapillary_tools/geotag/camm_parser.py @@ -160,12 +160,12 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]: points = None moov = sample_parser.MovieBoxParser.parse_stream(fp) - for track in moov.parse_tracks(): - descriptions = track.parse_sample_descriptions() + for track in moov.extract_tracks(): + descriptions = track.extract_sample_descriptions() if any(_is_camm_description(d) for d in descriptions): maybe_points = ( _parse_point_from_sample(fp, sample) - for sample in track.parse_samples() + for sample in track.extract_samples() if _is_camm_description(sample.description) ) points = [p for p in maybe_points if p is not None] @@ -178,7 +178,7 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]: mdhd_boxdata = track.extract_mdhd_boxdata() media_timescale = mdhd_boxdata["timescale"] # movie_timescale - mvhd_boxdata = moov.mvhd() + mvhd_boxdata = moov.extract_mvhd_boxdata() movie_timescale = mvhd_boxdata["timescale"] segments = [ elst_entry_to_seconds( diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index 439a8ab3d..11d169048 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -314,12 +314,12 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.PointWithFix]]: """ points = None moov = sample_parser.MovieBoxParser.parse_stream(fp) - for track in moov.parse_tracks(): - descriptions = track.parse_sample_descriptions() + for track in moov.extract_tracks(): + descriptions = track.extract_sample_descriptions() if any(_is_gpmd_description(d) for d in descriptions): gpmd_samples = ( sample - for sample in track.parse_samples() + for sample in track.extract_samples() if _is_gpmd_description(sample.description) ) points = list(_extract_points_from_samples(fp, gpmd_samples)) @@ -332,12 +332,12 @@ def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.PointWithFix]]: def extract_all_device_names(fp: T.BinaryIO) -> T.Dict[int, bytes]: moov = sample_parser.MovieBoxParser.parse_stream(fp) - for track in moov.parse_tracks(): - descriptions = track.parse_sample_descriptions() + for track in moov.extract_tracks(): + descriptions = track.extract_sample_descriptions() if any(_is_gpmd_description(d) for d in descriptions): gpmd_samples = ( sample - for sample in track.parse_samples() + for sample in track.extract_samples() if _is_gpmd_description(sample.description) ) device_names = _extract_dvnm_from_samples(fp, gpmd_samples) diff --git a/mapillary_tools/geotag/mp4_sample_parser.py b/mapillary_tools/geotag/mp4_sample_parser.py index 79dcc7d86..78e85f116 100644 --- a/mapillary_tools/geotag/mp4_sample_parser.py +++ b/mapillary_tools/geotag/mp4_sample_parser.py @@ -150,7 +150,7 @@ def _extract_samples( ).BoxList -def parse_raw_samples_from_stbl_data( +def extract_raw_samples_from_stbl_data( stbl: bytes, ) -> T.Tuple[T.List[T.Dict], T.Generator[RawSample, None, None]]: descriptions = [] @@ -225,7 +225,7 @@ def __init__(self, trak_children: T.Sequence[cparser.BoxDict]): ) self.stbl_data = T.cast(bytes, stbl["data"]) - def tkhd(self) -> T.Dict: + def extract_tkhd_boxdata(self) -> T.Dict: return T.cast( T.Dict, cparser.find_box_at_pathx(self.trak_children, [b"tkhd"])["data"] ) @@ -234,7 +234,7 @@ def is_video_track(self) -> bool: hdlr = cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"hdlr"]) return T.cast(T.Dict[str, T.Any], hdlr["data"])["handler_type"] == b"vide" - def parse_sample_descriptions(self) -> T.List[T.Dict]: + def extract_sample_descriptions(self) -> T.List[T.Dict]: # TODO: return [] if parsing fail boxes = _STSDBoxListConstruct.parse(self.stbl_data) stsd = cparser.find_box_at_pathx( @@ -252,12 +252,12 @@ def extract_mdhd_boxdata(self) -> T.Dict: box = cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"mdhd"]) return T.cast(T.Dict, box["data"]) - def parse_raw_samples(self) -> T.Generator[RawSample, None, None]: - _, raw_samples = parse_raw_samples_from_stbl_data(self.stbl_data) + def extract_raw_samples(self) -> T.Generator[RawSample, None, None]: + _, raw_samples = extract_raw_samples_from_stbl_data(self.stbl_data) yield from raw_samples - def parse_samples(self) -> T.Generator[Sample, None, None]: - descriptions, raw_samples = parse_raw_samples_from_stbl_data(self.stbl_data) + def extract_samples(self) -> T.Generator[Sample, None, None]: + descriptions, raw_samples = extract_raw_samples_from_stbl_data(self.stbl_data) mdhd = T.cast( T.Dict, cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"mdhd"])["data"], @@ -285,16 +285,16 @@ def parse_stream(cls, stream: T.BinaryIO) -> "MovieBoxParser": moov = sparser.parse_box_data_firstx(stream, [b"moov"]) return MovieBoxParser(moov) - def mvhd(self) -> T.Dict: + def extract_mvhd_boxdata(self) -> T.Dict: mvhd = cparser.find_box_at_pathx(self.moov_children, [b"mvhd"]) return T.cast(T.Dict, mvhd["data"]) - def parse_tracks(self) -> T.Generator[TrackBoxParser, None, None]: + def extract_tracks(self) -> T.Generator[TrackBoxParser, None, None]: for box in self.moov_children: if box["type"] == b"trak": yield TrackBoxParser(T.cast(T.Sequence[cparser.BoxDict], box["data"])) - def parse_track_at(self, stream_idx: int) -> TrackBoxParser: + def extract_track_at(self, stream_idx: int) -> TrackBoxParser: """ stream_idx should be the stream_index specifier. See http://ffmpeg.org/ffmpeg.html#Stream-specifiers-1 > Stream numbering is based on the order of the streams as detected by libavformat diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index 4b19605b5..38c7dd308 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -255,7 +255,7 @@ def _update_sbtl_sample_offsets(trak: BoxDict, sample_offset: int) -> int: ) sample_offset += sample.size stbl_box = cparser.find_box_at_pathx(trak, [b"trak", b"mdia", b"minf", b"stbl"]) - descriptions, _ = sample_parser.parse_raw_samples_from_stbl_data( + descriptions, _ = sample_parser.extract_raw_samples_from_stbl_data( T.cast(bytes, stbl_box["data"]) ) stbl_children_boxes = build_stbl_from_raw_samples( @@ -274,7 +274,7 @@ def iterate_samples( stbl_box = cparser.find_box_at_pathx( box, [b"trak", b"mdia", b"minf", b"stbl"] ) - _, raw_samples_iter = sample_parser.parse_raw_samples_from_stbl_data( + _, raw_samples_iter = sample_parser.extract_raw_samples_from_stbl_data( T.cast(bytes, stbl_box["data"]) ) yield from raw_samples_iter diff --git a/mapillary_tools/sample_video.py b/mapillary_tools/sample_video.py index 6c1d80afd..b4ba18f2e 100644 --- a/mapillary_tools/sample_video.py +++ b/mapillary_tools/sample_video.py @@ -234,7 +234,7 @@ def _sample_video_stream_by_distance( """ LOG.info("Extracting video samples") - sorted_samples = list(video_track_parser.parse_samples()) + sorted_samples = list(video_track_parser.extract_samples()) # we need sort sampels by composition time (CT) not the decoding offset (DT) # CT is the oder of videos streaming to audiences, as well as the order ffmpeg sampling sorted_samples.sort(key=lambda sample: sample.exact_composition_time) @@ -316,7 +316,7 @@ def _sample_single_video_by_distance( LOG.info("Extracting video samples") video_stream_idx = video_stream["index"] moov_parser = mp4_sample_parser.MovieBoxParser.parse_file(video_path) - video_track_parser = moov_parser.parse_track_at(video_stream_idx) + video_track_parser = moov_parser.extract_track_at(video_stream_idx) sample_points_by_frame_idx = _sample_video_stream_by_distance( video_metadata.points, video_track_parser, sample_distance ) diff --git a/tests/unit/test_mp4_sample_parser.py b/tests/unit/test_mp4_sample_parser.py index 1b08bdc6e..360cb9678 100644 --- a/tests/unit/test_mp4_sample_parser.py +++ b/tests/unit/test_mp4_sample_parser.py @@ -7,13 +7,13 @@ def test_movie_box_parser(): moov_parser = mp4_sample_parser.MovieBoxParser.parse_file( Path("tests/data/videos/sample-5s.mp4") ) - assert 2 == len(list(moov_parser.parse_tracks())) - video_track = moov_parser.parse_track_at(0) + assert 2 == len(list(moov_parser.extract_tracks())) + video_track = moov_parser.extract_track_at(0) assert video_track.is_video_track() - aac_track = moov_parser.parse_track_at(1) + aac_track = moov_parser.extract_track_at(1) assert not aac_track.is_video_track() - samples = list(video_track.parse_samples()) - raw_samples = list(video_track.parse_raw_samples()) + samples = list(video_track.extract_samples()) + raw_samples = list(video_track.extract_raw_samples()) assert 171 == len(samples) assert len(samples) == len(raw_samples) assert { @@ -31,7 +31,7 @@ def test_movie_box_parser(): "height": 70778880, } == { k: v - for k, v in video_track.tkhd().items() + for k, v in video_track.extract_tkhd_boxdata().items() if k in [ "version", @@ -47,7 +47,7 @@ def test_movie_box_parser(): "height", ] } - assert isinstance(video_track.tkhd(), dict) + assert isinstance(video_track.extract_tkhd_boxdata(), dict) for sample, raw_sample in zip(samples, raw_samples): assert sample.raw_sample.offset == raw_sample.offset assert sample.raw_sample.is_sync == raw_sample.is_sync diff --git a/tests/unit/test_simple_mp4_builder.py b/tests/unit/test_simple_mp4_builder.py index 27cbbd84c..03e6b664b 100644 --- a/tests/unit/test_simple_mp4_builder.py +++ b/tests/unit/test_simple_mp4_builder.py @@ -50,7 +50,7 @@ def _build_and_parse_stbl( io.BytesIO(ss) ) assert expected_samples == list(parsed_samples) - _, parsed_samples = sample_parser.parse_raw_samples_from_stbl_data(ss) + _, parsed_samples = sample_parser.extract_raw_samples_from_stbl_data(ss) assert expected_samples == list(parsed_samples) @@ -330,7 +330,7 @@ def _verify_samples(descs, samples): samples = list(sample_iter) _verify_samples(descs, samples) - descs, sample_iter = sample_parser.parse_raw_samples_from_stbl_data(stbl_bytes) + descs, sample_iter = sample_parser.extract_raw_samples_from_stbl_data(stbl_bytes) samples = list(sample_iter) _verify_samples(descs, samples) From 730fbae3d06d6e9532a789d9d9e183ad8ffc6721 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Mon, 18 Nov 2024 17:43:40 -0800 Subject: [PATCH 09/14] remove deprecated functions --- tests/unit/test_simple_mp4_builder.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/unit/test_simple_mp4_builder.py b/tests/unit/test_simple_mp4_builder.py index 03e6b664b..7a1db211a 100644 --- a/tests/unit/test_simple_mp4_builder.py +++ b/tests/unit/test_simple_mp4_builder.py @@ -46,10 +46,6 @@ def _build_and_parse_stbl( ) ss = sparser.parse_box_data_firstx(io.BytesIO(d), [b"stbl"]) assert d[8:] == ss - _, parsed_samples = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( - io.BytesIO(ss) - ) - assert expected_samples == list(parsed_samples) _, parsed_samples = sample_parser.extract_raw_samples_from_stbl_data(ss) assert expected_samples == list(parsed_samples) @@ -324,12 +320,6 @@ def _verify_samples(descs, samples): {"data": {"entries": [1, 3]}, "type": b"stss"}, ] - descs, sample_iter = sample_parser.parse_raw_samples_from_stbl_DEPRECATED( - io.BytesIO(stbl_bytes) - ) - samples = list(sample_iter) - _verify_samples(descs, samples) - descs, sample_iter = sample_parser.extract_raw_samples_from_stbl_data(stbl_bytes) samples = list(sample_iter) _verify_samples(descs, samples) From f400b95b0abc5b0c7c4c703acb1e82a70834362f Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 27 Nov 2024 14:46:25 -0800 Subject: [PATCH 10/14] rename composition_timedelta to composition_offset --- mapillary_tools/geotag/camm_builder.py | 2 +- mapillary_tools/geotag/mp4_sample_parser.py | 36 ++++++++++---------- mapillary_tools/geotag/simple_mp4_builder.py | 10 +++--- tests/unit/test_simple_mp4_builder.py | 30 ++++++++-------- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/mapillary_tools/geotag/camm_builder.py b/mapillary_tools/geotag/camm_builder.py index 53740d2c2..5ff61e35b 100644 --- a/mapillary_tools/geotag/camm_builder.py +++ b/mapillary_tools/geotag/camm_builder.py @@ -104,7 +104,7 @@ def convert_points_to_raw_samples( offset=0, size=len(camm_sample_data), timedelta=timedelta, - composition_timedelta=0, + composition_offset=0, is_sync=True, ) diff --git a/mapillary_tools/geotag/mp4_sample_parser.py b/mapillary_tools/geotag/mp4_sample_parser.py index 78e85f116..1cebd682b 100644 --- a/mapillary_tools/geotag/mp4_sample_parser.py +++ b/mapillary_tools/geotag/mp4_sample_parser.py @@ -23,7 +23,7 @@ class RawSample(T.NamedTuple): # sample composition offset that decides when to present the sample, # i.e. CTTS(n) in the forumula CT(n) = DT(n) + CTTS(n). # NOTE: timescale is not applied yet (hence int) - composition_timedelta: int + composition_offset: int # if it is a sync sample is_sync: bool @@ -50,7 +50,7 @@ def _extract_raw_samples( chunk_entries: T.Sequence[T.Dict], chunk_offsets: T.Sequence[int], timedeltas: T.Sequence[int], - composition_timedeltas: T.Optional[T.Sequence[int]], + composition_offsets: T.Optional[T.Sequence[int]], syncs: T.Optional[T.Set[int]], ) -> T.Generator[RawSample, None, None]: if not sizes: @@ -81,9 +81,9 @@ def _extract_raw_samples( # iterate samples in this chunk for _ in range(entry["samples_per_chunk"]): is_sync = syncs is None or (sample_idx + 1) in syncs - composition_timedelta = ( - composition_timedeltas[sample_idx] - if composition_timedeltas is not None + composition_offset = ( + composition_offsets[sample_idx] + if composition_offsets is not None else 0 ) yield RawSample( @@ -91,7 +91,7 @@ def _extract_raw_samples( offset=sample_offset, size=sizes[sample_idx], timedelta=timedeltas[sample_idx], - composition_timedelta=composition_timedelta, + composition_offset=composition_offset, is_sync=is_sync, ) sample_offset += sizes[sample_idx] @@ -108,9 +108,9 @@ def _extract_raw_samples( # iterate samples in this chunk for _ in range(chunk_entries[-1]["samples_per_chunk"]): is_sync = syncs is None or (sample_idx + 1) in syncs - composition_timedelta = ( - composition_timedeltas[sample_idx] - if composition_timedeltas is not None + composition_offset = ( + composition_offsets[sample_idx] + if composition_offsets is not None else 0 ) yield RawSample( @@ -118,7 +118,7 @@ def _extract_raw_samples( offset=sample_offset, size=sizes[sample_idx], timedelta=timedeltas[sample_idx], - composition_timedelta=composition_timedelta, + composition_offset=composition_offset, is_sync=is_sync, ) sample_offset += sizes[sample_idx] @@ -139,7 +139,7 @@ def _extract_samples( exact_time=acc_delta / timescale, exact_timedelta=raw_sample.timedelta / timescale, # CT(n) = DT(n) + CTTS(n) - exact_composition_time=(acc_delta + raw_sample.composition_timedelta) + exact_composition_time=(acc_delta + raw_sample.composition_offset) / timescale, ) acc_delta += raw_sample.timedelta @@ -158,7 +158,7 @@ def extract_raw_samples_from_stbl_data( chunk_offsets = [] chunk_entries = [] timedeltas: T.List[int] = [] - composition_timedeltas: T.Optional[T.List[int]] = None + composition_offsets: T.Optional[T.List[int]] = None syncs: T.Optional[T.Set[int]] = None stbl_children = T.cast( @@ -187,10 +187,10 @@ def extract_raw_samples_from_stbl_data( for _ in range(entry["sample_count"]): timedeltas.append(entry["sample_delta"]) elif box["type"] == b"ctts": - composition_timedeltas = [] + composition_offsets = [] for entry in data["entries"]: for _ in range(entry["sample_count"]): - composition_timedeltas.append(entry["sample_offset"]) + composition_offsets.append(entry["sample_offset"]) elif box["type"] == b"stss": syncs = set(data["entries"]) @@ -198,12 +198,12 @@ def extract_raw_samples_from_stbl_data( # in this case append 0's to timedeltas while len(timedeltas) < len(sizes): timedeltas.append(0) - if composition_timedeltas is not None: - while len(composition_timedeltas) < len(sizes): - composition_timedeltas.append(0) + if composition_offsets is not None: + while len(composition_offsets) < len(sizes): + composition_offsets.append(0) raw_samples = _extract_raw_samples( - sizes, chunk_entries, chunk_offsets, timedeltas, composition_timedeltas, syncs + sizes, chunk_entries, chunk_offsets, timedeltas, composition_offsets, syncs ) return descriptions, raw_samples diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index 38c7dd308..632aecca0 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -144,10 +144,10 @@ class _CompressedSampleCompositionOffset: sample_offset: int -def _build_ctts(sample_composition_timedeltas: T.Iterable[int]) -> BoxDict: +def _build_ctts(sample_composition_offsets: T.Iterable[int]) -> BoxDict: # compress offsets compressed: T.List[_CompressedSampleCompositionOffset] = [] - for offset in sample_composition_timedeltas: + for offset in sample_composition_offsets: if compressed and offset == compressed[-1].sample_offset: compressed[-1].sample_count += 1 else: @@ -197,8 +197,8 @@ def build_stbl_from_raw_samples( # so we can calculate the moov box size in advance _build_co64(raw_samples), ] - if any(s.composition_timedelta for s in raw_samples): - boxes.append(_build_ctts((s.composition_timedelta for s in raw_samples))) + if any(s.composition_offset for s in raw_samples): + boxes.append(_build_ctts((s.composition_offset for s in raw_samples))) if any(not s.is_sync for s in raw_samples): boxes.append(_build_stss((s.is_sync for s in raw_samples))) return boxes @@ -249,7 +249,7 @@ def _update_sbtl_sample_offsets(trak: BoxDict, sample_offset: int) -> int: offset=sample_offset, size=sample.size, timedelta=sample.timedelta, - composition_timedelta=sample.composition_timedelta, + composition_offset=sample.composition_offset, is_sync=sample.is_sync, ) ) diff --git a/tests/unit/test_simple_mp4_builder.py b/tests/unit/test_simple_mp4_builder.py index 7a1db211a..80d971d5d 100644 --- a/tests/unit/test_simple_mp4_builder.py +++ b/tests/unit/test_simple_mp4_builder.py @@ -62,7 +62,7 @@ def test_build_stbl_happy(): offset=1, size=1, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), sample_parser.RawSample( @@ -70,7 +70,7 @@ def test_build_stbl_happy(): offset=2, size=9, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=False, ), ] @@ -82,7 +82,7 @@ def test_build_stbl_happy(): offset=1, size=1, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), sample_parser.RawSample( @@ -90,7 +90,7 @@ def test_build_stbl_happy(): offset=2, size=2, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=False, ), # another chunk here due to a 1-byte break @@ -99,7 +99,7 @@ def test_build_stbl_happy(): offset=5, size=1, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), sample_parser.RawSample( @@ -107,7 +107,7 @@ def test_build_stbl_happy(): offset=6, size=9, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=False, ), ] @@ -119,7 +119,7 @@ def test_build_stbl_happy(): offset=1, size=1, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=False, ), sample_parser.RawSample( @@ -127,7 +127,7 @@ def test_build_stbl_happy(): offset=2, size=2, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), # another chunk here @@ -136,7 +136,7 @@ def test_build_stbl_happy(): offset=4, size=1, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), # another chunk here @@ -145,7 +145,7 @@ def test_build_stbl_happy(): offset=5, size=9, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), ] @@ -157,7 +157,7 @@ def test_build_stbl_happy(): offset=1, size=1, timedelta=2, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), ] @@ -255,7 +255,7 @@ def _verify_samples(descs, samples): offset=1, size=1, timedelta=20, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), sample_parser.RawSample( @@ -263,7 +263,7 @@ def _verify_samples(descs, samples): offset=2, size=2, timedelta=30, - composition_timedelta=0, + composition_offset=0, is_sync=False, ), sample_parser.RawSample( @@ -271,7 +271,7 @@ def _verify_samples(descs, samples): offset=5, size=3, timedelta=30, - composition_timedelta=0, + composition_offset=0, is_sync=True, ), sample_parser.RawSample( @@ -279,7 +279,7 @@ def _verify_samples(descs, samples): offset=8, size=3, timedelta=50, - composition_timedelta=0, + composition_offset=0, is_sync=False, ), ] == samples From a57b42a118aa2412ce5551a07859877e89d56475 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 27 Nov 2024 15:12:20 -0800 Subject: [PATCH 11/14] move mp4 to a separate module --- mapillary_tools/geotag/blackvue_parser.py | 2 +- mapillary_tools/geotag/camm_builder.py | 6 ++++-- mapillary_tools/geotag/camm_parser.py | 7 ++----- mapillary_tools/geotag/geotag_videos_from_video.py | 2 +- mapillary_tools/geotag/gpmf_parser.py | 2 +- mapillary_tools/geotag/simple_mp4_builder.py | 8 ++++---- mapillary_tools/{geotag => mp4}/construct_mp4_parser.py | 0 mapillary_tools/{geotag => mp4}/mp4_sample_parser.py | 0 mapillary_tools/{geotag => mp4}/simple_mp4_parser.py | 0 mapillary_tools/sample_video.py | 3 ++- .../video_data_extraction/extractors/blackvue_parser.py | 9 +++++---- .../video_data_extraction/extractors/camm_parser.py | 9 +++++---- .../video_data_extraction/extractors/gopro_parser.py | 9 +++++---- tests/unit/test_blackvue_parser.py | 3 ++- tests/unit/test_camm_parser.py | 2 +- tests/unit/test_mp4_sample_parser.py | 2 +- tests/unit/test_simple_mp4_builder.py | 4 +++- tests/unit/test_simple_mp4_parser.py | 2 +- 18 files changed, 38 insertions(+), 32 deletions(-) rename mapillary_tools/{geotag => mp4}/construct_mp4_parser.py (100%) rename mapillary_tools/{geotag => mp4}/mp4_sample_parser.py (100%) rename mapillary_tools/{geotag => mp4}/simple_mp4_parser.py (100%) diff --git a/mapillary_tools/geotag/blackvue_parser.py b/mapillary_tools/geotag/blackvue_parser.py index 812dc70a3..99fc92ba3 100644 --- a/mapillary_tools/geotag/blackvue_parser.py +++ b/mapillary_tools/geotag/blackvue_parser.py @@ -7,7 +7,7 @@ import pynmea2 from .. import geo -from . import simple_mp4_parser as sparser +from ..mp4 import simple_mp4_parser as sparser LOG = logging.getLogger(__name__) diff --git a/mapillary_tools/geotag/camm_builder.py b/mapillary_tools/geotag/camm_builder.py index 5ff61e35b..5a013f578 100644 --- a/mapillary_tools/geotag/camm_builder.py +++ b/mapillary_tools/geotag/camm_builder.py @@ -2,11 +2,13 @@ import typing as T from .. import geo, types +from ..mp4 import ( + construct_mp4_parser as cparser, + mp4_sample_parser as sample_parser, +) from . import ( camm_parser, - construct_mp4_parser as cparser, - mp4_sample_parser as sample_parser, simple_mp4_builder as builder, ) from .simple_mp4_builder import BoxDict diff --git a/mapillary_tools/geotag/camm_parser.py b/mapillary_tools/geotag/camm_parser.py index 406985fa2..f93b7ffd4 100644 --- a/mapillary_tools/geotag/camm_parser.py +++ b/mapillary_tools/geotag/camm_parser.py @@ -9,11 +9,8 @@ import construct as C -from . import ( - geo, - mp4_sample_parser as sample_parser, - simple_mp4_parser as sparser, -) +from . import geo +from ..mp4 import simple_mp4_parser as sparser, mp4_sample_parser as sample_parser LOG = logging.getLogger(__name__) diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index 42846ffc0..d1d31c0d8 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -12,9 +12,9 @@ camm_parser, gpmf_gps_filter, gpmf_parser, - simple_mp4_parser as sparser, utils as video_utils, ) +from ..mp4 import simple_mp4_parser as sparser from .geotag_from_generic import GeotagVideosFromGeneric LOG = logging.getLogger(__name__) diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index 211164b19..7feaf7134 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -5,7 +5,7 @@ import construct as C from .. import geo -from . import mp4_sample_parser as sample_parser +from ..mp4 import mp4_sample_parser as sample_parser """ Parsing GPS from GPMF data format stored in GoPros. See the GPMF spec: https://github.com/gopro/gpmf-parser diff --git a/mapillary_tools/geotag/simple_mp4_builder.py b/mapillary_tools/geotag/simple_mp4_builder.py index 632aecca0..6946f3102 100644 --- a/mapillary_tools/geotag/simple_mp4_builder.py +++ b/mapillary_tools/geotag/simple_mp4_builder.py @@ -2,14 +2,14 @@ import io import typing as T -from . import ( +from . import io_utils +from ..mp4 import ( construct_mp4_parser as cparser, - io_utils, mp4_sample_parser as sample_parser, simple_mp4_parser as sparser, ) -from .construct_mp4_parser import BoxDict -from .mp4_sample_parser import RawSample +from ..mp4.construct_mp4_parser import BoxDict +from ..mp4.mp4_sample_parser import RawSample """ Variable naming conventions: diff --git a/mapillary_tools/geotag/construct_mp4_parser.py b/mapillary_tools/mp4/construct_mp4_parser.py similarity index 100% rename from mapillary_tools/geotag/construct_mp4_parser.py rename to mapillary_tools/mp4/construct_mp4_parser.py diff --git a/mapillary_tools/geotag/mp4_sample_parser.py b/mapillary_tools/mp4/mp4_sample_parser.py similarity index 100% rename from mapillary_tools/geotag/mp4_sample_parser.py rename to mapillary_tools/mp4/mp4_sample_parser.py diff --git a/mapillary_tools/geotag/simple_mp4_parser.py b/mapillary_tools/mp4/simple_mp4_parser.py similarity index 100% rename from mapillary_tools/geotag/simple_mp4_parser.py rename to mapillary_tools/mp4/simple_mp4_parser.py diff --git a/mapillary_tools/sample_video.py b/mapillary_tools/sample_video.py index bc9cd0788..65d1baa72 100644 --- a/mapillary_tools/sample_video.py +++ b/mapillary_tools/sample_video.py @@ -9,7 +9,8 @@ from . import constants, exceptions, ffmpeg as ffmpeglib, geo, types, utils from .exif_write import ExifEdit -from .geotag import geotag_videos_from_video, mp4_sample_parser +from .geotag import geotag_videos_from_video +from .mp4 import mp4_sample_parser from .process_geotag_properties import GeotagSource LOG = logging.getLogger(__name__) diff --git a/mapillary_tools/video_data_extraction/extractors/blackvue_parser.py b/mapillary_tools/video_data_extraction/extractors/blackvue_parser.py index 7f088677a..9aef060f4 100644 --- a/mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +++ b/mapillary_tools/video_data_extraction/extractors/blackvue_parser.py @@ -1,8 +1,9 @@ import typing as T -from mapillary_tools import geo -from mapillary_tools.geotag import blackvue_parser, simple_mp4_parser -from mapillary_tools.video_data_extraction.extractors.base_parser import BaseParser +from ... import geo +from ...geotag import blackvue_parser +from ...mp4 import simple_mp4_parser as sparser +from .base_parser import BaseParser class BlackVueParser(BaseParser): @@ -21,7 +22,7 @@ def extract_points(self) -> T.Sequence[geo.Point]: points = blackvue_parser.extract_points(fp) or [] self.pointsFound = len(points) > 0 return points - except simple_mp4_parser.ParsingError: + except sparser.ParsingError: return [] def extract_make(self) -> T.Optional[str]: diff --git a/mapillary_tools/video_data_extraction/extractors/camm_parser.py b/mapillary_tools/video_data_extraction/extractors/camm_parser.py index 98e0b8d69..122a0ca5f 100644 --- a/mapillary_tools/video_data_extraction/extractors/camm_parser.py +++ b/mapillary_tools/video_data_extraction/extractors/camm_parser.py @@ -1,9 +1,10 @@ import functools import typing as T -from mapillary_tools import geo -from mapillary_tools.geotag import camm_parser, simple_mp4_parser -from mapillary_tools.video_data_extraction.extractors.base_parser import BaseParser +from ... import geo +from ...geotag import camm_parser +from ...mp4 import simple_mp4_parser as sparser +from .base_parser import BaseParser class CammParser(BaseParser): @@ -23,7 +24,7 @@ def extract_points(self) -> T.Sequence[geo.Point]: with source_path.open("rb") as fp: try: return camm_parser.extract_points(fp) or [] - except simple_mp4_parser.ParsingError: + except sparser.ParsingError: return [] def extract_make(self) -> T.Optional[str]: diff --git a/mapillary_tools/video_data_extraction/extractors/gopro_parser.py b/mapillary_tools/video_data_extraction/extractors/gopro_parser.py index 3a4c3efde..77e488ad3 100644 --- a/mapillary_tools/video_data_extraction/extractors/gopro_parser.py +++ b/mapillary_tools/video_data_extraction/extractors/gopro_parser.py @@ -1,8 +1,9 @@ import typing as T -from mapillary_tools import geo -from mapillary_tools.geotag import gpmf_parser, simple_mp4_parser -from mapillary_tools.video_data_extraction.extractors.base_parser import BaseParser +from ... import geo +from ...geotag import gpmf_parser +from ...mp4 import simple_mp4_parser as sparser +from .base_parser import BaseParser class GoProParser(BaseParser): @@ -21,7 +22,7 @@ def extract_points(self) -> T.Sequence[geo.Point]: points = gpmf_parser.extract_points(fp) or [] self.pointsFound = len(points) > 0 return points - except simple_mp4_parser.ParsingError: + except sparser.ParsingError: return [] def extract_make(self) -> T.Optional[str]: diff --git a/tests/unit/test_blackvue_parser.py b/tests/unit/test_blackvue_parser.py index 9ec65a450..0832a739f 100644 --- a/tests/unit/test_blackvue_parser.py +++ b/tests/unit/test_blackvue_parser.py @@ -2,7 +2,8 @@ import mapillary_tools.geo as geo -from mapillary_tools.geotag import blackvue_parser, construct_mp4_parser as cparser +from mapillary_tools.geotag import blackvue_parser +from mapillary_tools.mp4 import construct_mp4_parser as cparser def test_parse_points(): diff --git a/tests/unit/test_camm_parser.py b/tests/unit/test_camm_parser.py index ca22b5716..ed3237837 100644 --- a/tests/unit/test_camm_parser.py +++ b/tests/unit/test_camm_parser.py @@ -7,9 +7,9 @@ from mapillary_tools.geotag import ( camm_builder, camm_parser, - construct_mp4_parser as cparser, simple_mp4_builder, ) +from mapillary_tools.mp4 import construct_mp4_parser as cparser def test_filter_points_by_edit_list(): diff --git a/tests/unit/test_mp4_sample_parser.py b/tests/unit/test_mp4_sample_parser.py index 360cb9678..6e561fcb0 100644 --- a/tests/unit/test_mp4_sample_parser.py +++ b/tests/unit/test_mp4_sample_parser.py @@ -1,6 +1,6 @@ from pathlib import Path -from mapillary_tools.geotag import mp4_sample_parser +from mapillary_tools.mp4 import mp4_sample_parser def test_movie_box_parser(): diff --git a/tests/unit/test_simple_mp4_builder.py b/tests/unit/test_simple_mp4_builder.py index 80d971d5d..8bd67e7d5 100644 --- a/tests/unit/test_simple_mp4_builder.py +++ b/tests/unit/test_simple_mp4_builder.py @@ -2,9 +2,11 @@ import typing as T from mapillary_tools.geotag import ( + simple_mp4_builder as builder, +) +from mapillary_tools.mp4 import ( construct_mp4_parser as cparser, mp4_sample_parser as sample_parser, - simple_mp4_builder as builder, simple_mp4_parser as sparser, ) diff --git a/tests/unit/test_simple_mp4_parser.py b/tests/unit/test_simple_mp4_parser.py index eaeb7142b..5b375842e 100644 --- a/tests/unit/test_simple_mp4_parser.py +++ b/tests/unit/test_simple_mp4_parser.py @@ -1,7 +1,7 @@ import io import typing -from mapillary_tools.geotag import ( +from mapillary_tools.mp4 import ( construct_mp4_parser as cparser, simple_mp4_parser as sparser, ) From 88a0bd98e4fb56f7330ab058f8c43f167c96ec39 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 27 Nov 2024 16:28:45 -0800 Subject: [PATCH 12/14] add the missing __init__.py --- mapillary_tools/mp4/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 mapillary_tools/mp4/__init__.py diff --git a/mapillary_tools/mp4/__init__.py b/mapillary_tools/mp4/__init__.py new file mode 100644 index 000000000..e69de29bb From 55de501807f1a0b43b6bc6777708d4ff1e690924 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 27 Nov 2024 16:39:56 -0800 Subject: [PATCH 13/14] fix import --- tests/cli/simple_mp4_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cli/simple_mp4_parser.py b/tests/cli/simple_mp4_parser.py index 74f24056d..da8e3f294 100644 --- a/tests/cli/simple_mp4_parser.py +++ b/tests/cli/simple_mp4_parser.py @@ -6,7 +6,7 @@ import typing as T from mapillary_tools import utils -from mapillary_tools.geotag import ( +from mapillary_tools.mp4 import ( construct_mp4_parser as cparser, mp4_sample_parser as sample_parser, simple_mp4_parser as sparser, From 8a4594f5637beb954852053cd43dad1c93260b85 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 27 Nov 2024 17:13:24 -0800 Subject: [PATCH 14/14] update setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 74b9a348f..2d09b2950 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,7 @@ def readme(): "mapillary_tools", "mapillary_tools.commands", "mapillary_tools.geotag", + "mapillary_tools.mp4", "mapillary_tools.video_data_extraction", "mapillary_tools.video_data_extraction.extractors", ],