diff --git a/mmif/serialize/annotation.py b/mmif/serialize/annotation.py index 3e6f91d7..a371d450 100644 --- a/mmif/serialize/annotation.py +++ b/mmif/serialize/annotation.py @@ -27,29 +27,36 @@ class Annotation(FreezableMmifObject): """ def __init__(self, anno_obj: Union[bytes, str, dict] = None) -> None: - self._type: Union[str, ThingTypesBase] = '' + self._type: ThingTypesBase = ThingTypesBase('') if not hasattr(self, 'properties'): # don't overwrite DocumentProperties on super() call self.properties: AnnotationProperties = AnnotationProperties() self._attribute_classes = pmap({'properties': AnnotationProperties}) self.disallow_additional_properties() self._required_attributes = pvector(["_type", "properties"]) super().__init__(anno_obj) - - def is_type(self, type: Union[str, ThingTypesBase]) -> bool: + + def _deserialize(self, input_dict: dict) -> None: + self.at_type = input_dict.pop('_type') + super()._deserialize(input_dict) + + def is_type(self, at_type: Union[str, ThingTypesBase]) -> bool: """ Check if the @type of this object matches. """ - return str(self.at_type) == str(type) + return self.at_type == at_type @property - def at_type(self) -> Union[str, ThingTypesBase]: + def at_type(self) -> ThingTypesBase: # TODO (krim @ 8/19/20): should we always return string? leaving this to return # different types can be confusing for sdk users. return self._type @at_type.setter def at_type(self, at_type: Union[str, ThingTypesBase]) -> None: - self._type = at_type + if isinstance(at_type, str): + self._type = ThingTypesBase.from_str(at_type) + else: + self._type = at_type @property def id(self) -> str: @@ -85,7 +92,7 @@ def add_property(self, name: str, f"(\"{name}\": \"{str(value)}\"") def is_document(self): - return self.at_type.endswith("Document") + return isinstance(self.at_type, DocumentTypesBase) class Document(Annotation): diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 690662ab..7ad9470c 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -265,6 +265,8 @@ def get_alignments(self, at_type1: Union[str, ThingTypesBase], at_type2: Union[s :return: a dict that keyed by view IDs (str) and has lists of alignment Annotation objects as values. """ v_and_a = {} + # at_type1 = ThingTypesBase.from_str(at_type1) if isinstance(at_type1, str) else at_type1 + # at_type2 = ThingTypesBase.from_str(at_type2) if isinstance(at_type2, str) else at_type2 for alignment_view in self.get_all_views_contain(AnnotationTypes.Alignment): alignments = [] # TODO (krim @ 11/7/20): maybe Alignment can have metadata on what types are aligned? @@ -274,10 +276,12 @@ def get_alignments(self, at_type1: Union[str, ThingTypesBase], at_type2: Union[s ann_id = cast(str, ann_id) if ':' in ann_id: view_id, ann_id = ann_id.split(':') - aligned_types.add(str(cast(Annotation, self[view_id][ann_id]).at_type)) + aligned_type = cast(Annotation, self[view_id][ann_id]).at_type else: - aligned_types.add(str(cast(Annotation, alignment_view[ann_id]).at_type)) - if str(at_type1) in aligned_types and str(at_type2) in aligned_types: + aligned_type = cast(Annotation, alignment_view[ann_id]).at_type + aligned_types.add(aligned_type) + aligned_types = list(aligned_types) # because membership check for sets also checks hash() values + if at_type1 in aligned_types and at_type2 in aligned_types: alignments.append(alignment) if len(alignments) > 0: v_and_a[alignment_view.id] = alignments @@ -319,9 +323,9 @@ def get_all_views_contain(self, at_types: Union[ThingTypesBase, str, List[Union[ """ if isinstance(at_types, list): return [view for view in self.views - if all(map(lambda x: str(x) in view.metadata.contains, at_types))] + if all(map(lambda x: x in view.metadata.contains, at_types))] else: - return [view for view in self.views if str(at_types) in view.metadata.contains] + return [view for view in self.views if at_types in view.metadata.contains] def get_views_contain(self, at_types: Union[ThingTypesBase, str, List[Union[str, ThingTypesBase]]]) -> List[View]: """ @@ -340,11 +344,11 @@ def get_view_contains(self, at_types: Union[ThingTypesBase, str, List[Union[str, # will return the *latest* view # works as of python 3.6+ (checked by setup.py) because dicts are deterministically ordered by insertion order for view in reversed(self.views): - if isinstance(at_types, str) or isinstance(at_types, ThingTypesBase): - if str(at_types) in view.metadata.contains: + if isinstance(at_types, list): + if all(map(lambda x: x in view.metadata.contains, at_types)): return view else: - if all(map(lambda x: str(x) in view.metadata.contains, at_types)): + if at_types in view.metadata.contains: return view return None diff --git a/mmif/serialize/view.py b/mmif/serialize/view.py index bb854f72..9bbe4ff3 100644 --- a/mmif/serialize/view.py +++ b/mmif/serialize/view.py @@ -128,8 +128,8 @@ def prop_check(k, v, *props): return any(k in prop and prop[k] == v for prop in props) for annotation in self.annotations: - at_type_metadata = self.metadata.contains.get(str(annotation.at_type), {}) - if not at_type or (at_type and str(annotation.at_type) == str(at_type)): + at_type_metadata = self.metadata.contains.get(annotation.at_type, {}) + if not at_type or (at_type and annotation.at_type == at_type): if all(map(lambda kv: prop_check(kv[0], kv[1], annotation.properties, at_type_metadata), properties.items())): yield annotation @@ -196,24 +196,6 @@ def __init__(self, viewmetadata_obj: Union[bytes, str, dict] = None) -> None: # see MmifObject::_required_attributes in model.py super().__init__(viewmetadata_obj) - def _find_match_hotfix(self, key: str) -> bool: - """ - Checks the existing types in the contains dict to see if - the type passed in as ``key`` has the same shortname. - - FIXME: this will produce undesired results if there is a - shortname conflict in the view. - - :param key: the type (shortname or IRI) to check - :return: whether ``key`` already has a match in the ``contains`` dict - """ - exists = False - for existing_type in self.contains.keys(): - if key.split('/')[-1] == existing_type.split('/')[-1]: - exists = True - break - return exists - def new_contain(self, at_type: Union[str, ThingTypesBase], contain_dict: dict = None) -> Optional['Contain']: """ Adds a new element to the ``contains`` dictionary. @@ -222,16 +204,12 @@ def new_contain(self, at_type: Union[str, ThingTypesBase], contain_dict: dict = :param contain_dict: any metadata associated with the annotation type :return: the generated :class:`Contain` object """ - if isinstance(at_type, ThingTypesBase): - exists = self._find_match_hotfix(at_type.name) or self._find_match_hotfix(at_type.value) - final_key = at_type.value - else: - exists = self._find_match_hotfix(at_type) - final_key = at_type - - if not exists: + if isinstance(at_type, str): + at_type = ThingTypesBase.from_str(at_type) + + if at_type not in self.contains: new_contain = Contain(contain_dict) - self.contains[final_key] = new_contain + self.contains[at_type] = new_contain return new_contain def add_parameters(self, param_dict: dict = None, **param_kwargs): @@ -331,11 +309,21 @@ def append(self, value: Union[Annotation, Document], overwrite=False) -> None: class ContainsDict(FreezableDataDict[Contain]): - _items: Dict[str, Contain] + _items: Dict[ThingTypesBase, Contain] def _deserialize(self, input_dict: dict) -> None: self._items = {key: Contain(value) for key, value in input_dict.items()} def update(self, other: Union[dict, 'ContainsDict'], overwrite=False): for k, v in other.items(): + if isinstance(k, str): + k = ThingTypesBase.from_str(k) self._append_with_key(k, v, overwrite=overwrite) + + def get(self, key: Union[str, ThingTypesBase], default=None): + if isinstance(key, str): + key = ThingTypesBase.from_str(key) + return self._items.get(key, default) + + def __contains__(self, item): + return item in list(self._items.keys()) diff --git a/setup.py b/setup.py index 031fb20a..cb666629 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ name = "mmif-python" version_fname = "VERSION" +vocabulary_templates_path = 'templates/python/vocabulary' cmdclass = {} # Used to have `import mmif` that imported `mmif` directory as a sibling, not `mmif` site-package, @@ -44,32 +45,40 @@ def generate_subpack(parpack_name, subpack_name, init_contents=""): return subpack_dir -def generate_vocab_enum(spec_version, clams_types, source_path) -> str: +def generate_vocab_enum(spec_version, clams_types, mod_name) -> str: vocab_url = 'http://mmif.clams.ai/%s/vocabulary' % spec_version + + template_file = os.path.join(vocabulary_templates_path, mod_name + '.txt') + if mod_name.startswith('annotation'): + base_class_name = 'AnnotationTypesBase' + elif mod_name.startswith('document'): + base_class_name = 'DocumentTypesBase' + else: + base_class_name = 'ClamsTypesBase' file_out = io.StringIO() - with open(source_path, 'r') as file_in: + with open(template_file, 'r') as file_in: for line in file_in.readlines(): file_out.write(line.replace('', spec_version)) for type_name in clams_types: - file_out.write(f" {type_name} = '{vocab_url}/{type_name}'\n") + file_out.write(f" {type_name} = {base_class_name}('{vocab_url}/{type_name}')\n") string_out = file_out.getvalue() file_out.close() return string_out -def generate_vocabulary(spec_version, clams_types, source_path): +def generate_vocabulary(spec_version, clams_types): """ :param spec_version: :param clams_types: the tree - :param source_path: the directory of source txt files + :param template_path: the directory of source txt files :return: """ types = { - 'thing_types': ['ThingTypesBase', 'ThingType'], - 'annotation_types': ['AnnotationTypesBase', 'AnnotationTypes'], - 'document_types': ['DocumentTypesBase', 'DocumentTypes'] + 'base_types': ['ThingTypesBase', 'ThingType', 'ClamsTypesBase', 'AnnotationTypesBase', 'DocumentTypesBase'], + 'annotation_types': ['AnnotationTypes'], + 'document_types': ['DocumentTypes'] } vocabulary_dir = generate_subpack( mmif_name, mmif_vocabulary_pkg, @@ -88,11 +97,11 @@ def generate_vocabulary(spec_version, clams_types, source_path): 'annotation_types': [t for t in clams_types if 'Document' not in t and t != 'Thing'], # extract thing type - 'thing_types': clams_types[:1] + 'base_types': clams_types[:1] } for mod_name, type_list in type_lists.items(): - enum_contents = generate_vocab_enum(spec_version, type_list, os.path.join(source_path, mod_name+'.txt')) + enum_contents = generate_vocab_enum(spec_version, type_list, mod_name) write_res_file(vocabulary_dir, mod_name+'.py', enum_contents) return vocabulary_dir @@ -151,7 +160,7 @@ def mod_run(self): import yaml yaml_file = io.BytesIO(get_spec_file_at_tag(gittag, mmif_vocab_res_oriname)) clams_types = [t['name'] for t in list(yaml.safe_load_all(yaml_file.read()))] - generate_vocabulary(spec_version, clams_types, 'vocabulary_files') + generate_vocabulary(spec_version, clams_types) ori_run(self) diff --git a/templates/python/vocabulary/annotation_types.txt b/templates/python/vocabulary/annotation_types.txt new file mode 100644 index 00000000..ed0584a5 --- /dev/null +++ b/templates/python/vocabulary/annotation_types.txt @@ -0,0 +1,10 @@ +# Spec version +# This file is auto-generated by setup.py + +from .base_types import AnnotationTypesBase + +class AnnotationTypes(AnnotationTypesBase): + """ + This class contains the CLAMS annotation types + defined in the spec version as class variables. + """ diff --git a/templates/python/vocabulary/base_types.txt b/templates/python/vocabulary/base_types.txt new file mode 100644 index 00000000..5e2f3a4e --- /dev/null +++ b/templates/python/vocabulary/base_types.txt @@ -0,0 +1,110 @@ +# This file is auto-generated by setup.py + +class TypesBase(object): + """ + Base class for arbitrary vocabulary type. + This class provides bisic initializer, comparators, + and (de-)serialization methods. + """ + + def __init__(self, type_uri: str): + if '/' in type_uri: + self.base_uri, self.shortname = type_uri.rsplit('/', 1) + else: + self.base_uri = "" + self.shortname = type_uri + + @classmethod + def from_str(cls, string: str): + if 'mmif.clams.ai' in string: + if string.endswith('Document'): + return DocumentTypesBase(string) + else: + return AnnotationTypesBase(string) + else: + return cls(string) + + def __hash__(self): + return hash(str(self)) + + def __eq__(self, other): + if isinstance(other, str): + other = self.from_str(other) + return isinstance(other, TypesBase) and self.base_uri == other.base_uri and self.shortname == other.shortname + + def __repr__(self): + if len(self.base_uri) > 0: + return f'{self.base_uri}/{self.shortname}' + else: + return self.shortname + + # aliases + def __str__(self): + return self.__repr__() + + def _serialize(self): + return self.__repr__() + + +ThingTypesBase = TypesBase + + +class ClamsTypesBase(ThingTypesBase): + """ + Base class for CLAMS vocabulary types. Main + This class adds handling of MMIF specificaiton versions + in initializer and comparators. + """ + + def __init__(self, type_uri: str): + if 'mmif.clams.ai' in type_uri: + self.base_uri, self.version, _, self.shortname = type_uri.rsplit('/', 3) + else: + raise ValueError(f'{type_uri} is not a CLAMS vocabulary URI') + + def __hash__(self): + return hash(str(self)) + + def __eq__(self, other): + if isinstance(other, str): + other = ThingTypesBase.from_str(other) + if isinstance(other, ClamsTypesBase): + if '.' in self.version and '.' in other.version: + # regular version + s_major, s_minor, s_patch = self.version.split('.') + o_major, o_minor, o_patch = other.version.split('.') + if s_major != o_major or s_minor != o_minor: + return False + else: + # dummy version given at development time + if self.version != other.version: + return False + return self.base_uri == other.base_uri and self.shortname == other.shortname + else: + return False + + def __repr__(self): + return f'{self.base_uri}/{self.version}/vocabulary/{self.shortname}' + + +class AnnotationTypesBase(ClamsTypesBase): + """ + Inherit from this class to build your own custom annotation + vocabularies. + """ + ... + + +class DocumentTypesBase(ClamsTypesBase): + """ + Inherit from this class to build your own custom document + vocabularies. + """ + ... + + +class ThingType(ThingTypesBase): + """ + This class contains the topmost CLAMS thing type + defined in the spec version as a class variable. + """ diff --git a/templates/python/vocabulary/document_types.txt b/templates/python/vocabulary/document_types.txt new file mode 100644 index 00000000..3eaaa163 --- /dev/null +++ b/templates/python/vocabulary/document_types.txt @@ -0,0 +1,11 @@ +# Spec version +# This file is auto-generated by setup.py + +from .base_types import DocumentTypesBase + + +class DocumentTypes(DocumentTypesBase): + """ + This class contains the CLAMS document types + defined in the spec version as class variables. + """ diff --git a/tests/test_serialize.py b/tests/test_serialize.py index dfdcb516..684e5d1e 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -253,8 +253,8 @@ def test_get_all_views_contain(self): self.assertEqual(1, len(views)) views = mmif_obj.get_views_contain([ AnnotationTypes.TimeFrame, - DocumentTypes.TextDocument.value, - AnnotationTypes.Alignment.value + DocumentTypes.TextDocument, + AnnotationTypes.Alignment, ]) self.assertEqual(1, len(views)) views = mmif_obj.get_all_views_contain('not_a_type') @@ -267,8 +267,8 @@ def test_get_view_contains(self): self.assertEqual('v8', view.id) view = mmif_obj.get_view_contains([ AnnotationTypes.TimeFrame, - DocumentTypes.TextDocument.value, - AnnotationTypes.Alignment.value + DocumentTypes.TextDocument, + AnnotationTypes.Alignment, ]) self.assertIsNotNone(view) self.assertEqual('v4', view.id) diff --git a/vocabulary_files/annotation_types.txt b/vocabulary_files/annotation_types.txt deleted file mode 100644 index be3886ca..00000000 --- a/vocabulary_files/annotation_types.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Spec version -# This file is auto-generated by setup.py - -from .thing_types import ThingTypesBase - - -class AnnotationTypesBase(ThingTypesBase): - """ - Inherit from this class to build your own custom annotation - vocabularies. Each member's name should be the name of the - type, and each member's value should be the URI of that type. - """ - ... - - -class AnnotationTypes(AnnotationTypesBase): - """ - This enum contains the URIs for the MMIF annotation types defined in the spec version . - Use this to quickly get the correct URIs for those types, or use the objects themselves in your - code and they will serialize to the strings. - """ diff --git a/vocabulary_files/document_types.txt b/vocabulary_files/document_types.txt deleted file mode 100644 index 975d6c8c..00000000 --- a/vocabulary_files/document_types.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Spec version -# This file is auto-generated by setup.py - -from .thing_types import ThingTypesBase - - -class DocumentTypesBase(ThingTypesBase): - """ - Inherit from this class to build your own custom document - vocabularies. Each member's name should be the name of the - type, and each member's value should be the URI of that type. - """ - ... - - -class DocumentTypes(DocumentTypesBase): - """ - This enum contains the URIs for the MMIF document types defined in the spec version . - Use this to quickly get the correct URIs for those types, or use the objects themselves in your - code and they will serialize to the strings. - """ diff --git a/vocabulary_files/thing_types.txt b/vocabulary_files/thing_types.txt deleted file mode 100644 index 958b97cd..00000000 --- a/vocabulary_files/thing_types.txt +++ /dev/null @@ -1,19 +0,0 @@ -# This file is auto-generated by setup.py - -from enum import Enum - - -class ThingTypesBase(Enum): - def _serialize(self): - return str(self) - - def __str__(self): - return self.value - - -class ThingType(ThingTypesBase): - """ - This enum contains the URI for the MMIF Thing types defined in the spec version . - Use this to quickly get the correct URI for this type, or use the object itself in your - code and it will serialize to the string. - """