Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions mmif/serialize/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,36 @@ class Annotation(FreezableMmifObject):
"""

def __init__(self, anno_obj: Union[bytes, str, dict] = None) -> None:
self._type: Union[str, ThingTypesBase] = ''
self._type: ThingTypesBase = ThingTypesBase('')
if not hasattr(self, 'properties'): # don't overwrite DocumentProperties on super() call
self.properties: AnnotationProperties = AnnotationProperties()
self._attribute_classes = pmap({'properties': AnnotationProperties})
self.disallow_additional_properties()
self._required_attributes = pvector(["_type", "properties"])
super().__init__(anno_obj)

def is_type(self, type: Union[str, ThingTypesBase]) -> bool:

def _deserialize(self, input_dict: dict) -> None:
self.at_type = input_dict.pop('_type')
super()._deserialize(input_dict)

def is_type(self, at_type: Union[str, ThingTypesBase]) -> bool:
"""
Check if the @type of this object matches.
"""
return str(self.at_type) == str(type)
return self.at_type == at_type

@property
def at_type(self) -> Union[str, ThingTypesBase]:
def at_type(self) -> ThingTypesBase:
# TODO (krim @ 8/19/20): should we always return string? leaving this to return
# different types can be confusing for sdk users.
return self._type

@at_type.setter
def at_type(self, at_type: Union[str, ThingTypesBase]) -> None:
self._type = at_type
if isinstance(at_type, str):
self._type = ThingTypesBase.from_str(at_type)
else:
self._type = at_type

@property
def id(self) -> str:
Expand Down Expand Up @@ -85,7 +92,7 @@ def add_property(self, name: str,
f"(\"{name}\": \"{str(value)}\"")

def is_document(self):
return self.at_type.endswith("Document")
return isinstance(self.at_type, DocumentTypesBase)


class Document(Annotation):
Expand Down
20 changes: 12 additions & 8 deletions mmif/serialize/mmif.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ def get_alignments(self, at_type1: Union[str, ThingTypesBase], at_type2: Union[s
:return: a dict that keyed by view IDs (str) and has lists of alignment Annotation objects as values.
"""
v_and_a = {}
# at_type1 = ThingTypesBase.from_str(at_type1) if isinstance(at_type1, str) else at_type1
# at_type2 = ThingTypesBase.from_str(at_type2) if isinstance(at_type2, str) else at_type2
for alignment_view in self.get_all_views_contain(AnnotationTypes.Alignment):
alignments = []
# TODO (krim @ 11/7/20): maybe Alignment can have metadata on what types are aligned?
Expand All @@ -274,10 +276,12 @@ def get_alignments(self, at_type1: Union[str, ThingTypesBase], at_type2: Union[s
ann_id = cast(str, ann_id)
if ':' in ann_id:
view_id, ann_id = ann_id.split(':')
aligned_types.add(str(cast(Annotation, self[view_id][ann_id]).at_type))
aligned_type = cast(Annotation, self[view_id][ann_id]).at_type
else:
aligned_types.add(str(cast(Annotation, alignment_view[ann_id]).at_type))
if str(at_type1) in aligned_types and str(at_type2) in aligned_types:
aligned_type = cast(Annotation, alignment_view[ann_id]).at_type
aligned_types.add(aligned_type)
aligned_types = list(aligned_types) # because membership check for sets also checks hash() values
if at_type1 in aligned_types and at_type2 in aligned_types:
alignments.append(alignment)
if len(alignments) > 0:
v_and_a[alignment_view.id] = alignments
Expand Down Expand Up @@ -319,9 +323,9 @@ def get_all_views_contain(self, at_types: Union[ThingTypesBase, str, List[Union[
"""
if isinstance(at_types, list):
return [view for view in self.views
if all(map(lambda x: str(x) in view.metadata.contains, at_types))]
if all(map(lambda x: x in view.metadata.contains, at_types))]
else:
return [view for view in self.views if str(at_types) in view.metadata.contains]
return [view for view in self.views if at_types in view.metadata.contains]

def get_views_contain(self, at_types: Union[ThingTypesBase, str, List[Union[str, ThingTypesBase]]]) -> List[View]:
"""
Expand All @@ -340,11 +344,11 @@ def get_view_contains(self, at_types: Union[ThingTypesBase, str, List[Union[str,
# will return the *latest* view
# works as of python 3.6+ (checked by setup.py) because dicts are deterministically ordered by insertion order
for view in reversed(self.views):
if isinstance(at_types, str) or isinstance(at_types, ThingTypesBase):
if str(at_types) in view.metadata.contains:
if isinstance(at_types, list):
if all(map(lambda x: x in view.metadata.contains, at_types)):
return view
else:
if all(map(lambda x: str(x) in view.metadata.contains, at_types)):
if at_types in view.metadata.contains:
return view
return None

Expand Down
48 changes: 18 additions & 30 deletions mmif/serialize/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ def prop_check(k, v, *props):
return any(k in prop and prop[k] == v for prop in props)

for annotation in self.annotations:
at_type_metadata = self.metadata.contains.get(str(annotation.at_type), {})
if not at_type or (at_type and str(annotation.at_type) == str(at_type)):
at_type_metadata = self.metadata.contains.get(annotation.at_type, {})
if not at_type or (at_type and annotation.at_type == at_type):
if all(map(lambda kv: prop_check(kv[0], kv[1], annotation.properties, at_type_metadata), properties.items())):
yield annotation

Expand Down Expand Up @@ -196,24 +196,6 @@ def __init__(self, viewmetadata_obj: Union[bytes, str, dict] = None) -> None:
# see MmifObject::_required_attributes in model.py
super().__init__(viewmetadata_obj)

def _find_match_hotfix(self, key: str) -> bool:
"""
Checks the existing types in the contains dict to see if
the type passed in as ``key`` has the same shortname.

FIXME: this will produce undesired results if there is a
shortname conflict in the view.

:param key: the type (shortname or IRI) to check
:return: whether ``key`` already has a match in the ``contains`` dict
"""
exists = False
for existing_type in self.contains.keys():
if key.split('/')[-1] == existing_type.split('/')[-1]:
exists = True
break
return exists

def new_contain(self, at_type: Union[str, ThingTypesBase], contain_dict: dict = None) -> Optional['Contain']:
"""
Adds a new element to the ``contains`` dictionary.
Expand All @@ -222,16 +204,12 @@ def new_contain(self, at_type: Union[str, ThingTypesBase], contain_dict: dict =
:param contain_dict: any metadata associated with the annotation type
:return: the generated :class:`Contain` object
"""
if isinstance(at_type, ThingTypesBase):
exists = self._find_match_hotfix(at_type.name) or self._find_match_hotfix(at_type.value)
final_key = at_type.value
else:
exists = self._find_match_hotfix(at_type)
final_key = at_type

if not exists:
if isinstance(at_type, str):
at_type = ThingTypesBase.from_str(at_type)

if at_type not in self.contains:
new_contain = Contain(contain_dict)
self.contains[final_key] = new_contain
self.contains[at_type] = new_contain
return new_contain

def add_parameters(self, param_dict: dict = None, **param_kwargs):
Expand Down Expand Up @@ -331,11 +309,21 @@ def append(self, value: Union[Annotation, Document], overwrite=False) -> None:


class ContainsDict(FreezableDataDict[Contain]):
_items: Dict[str, Contain]
_items: Dict[ThingTypesBase, Contain]

def _deserialize(self, input_dict: dict) -> None:
self._items = {key: Contain(value) for key, value in input_dict.items()}

def update(self, other: Union[dict, 'ContainsDict'], overwrite=False):
for k, v in other.items():
if isinstance(k, str):
k = ThingTypesBase.from_str(k)
self._append_with_key(k, v, overwrite=overwrite)

def get(self, key: Union[str, ThingTypesBase], default=None):
if isinstance(key, str):
key = ThingTypesBase.from_str(key)
return self._items.get(key, default)

def __contains__(self, item):
return item in list(self._items.keys())
31 changes: 20 additions & 11 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

name = "mmif-python"
version_fname = "VERSION"
vocabulary_templates_path = 'templates/python/vocabulary'
cmdclass = {}

# Used to have `import mmif` that imported `mmif` directory as a sibling, not `mmif` site-package,
Expand Down Expand Up @@ -44,32 +45,40 @@ def generate_subpack(parpack_name, subpack_name, init_contents=""):
return subpack_dir


def generate_vocab_enum(spec_version, clams_types, source_path) -> str:
def generate_vocab_enum(spec_version, clams_types, mod_name) -> str:
vocab_url = 'http://mmif.clams.ai/%s/vocabulary' % spec_version

template_file = os.path.join(vocabulary_templates_path, mod_name + '.txt')
if mod_name.startswith('annotation'):
base_class_name = 'AnnotationTypesBase'
elif mod_name.startswith('document'):
base_class_name = 'DocumentTypesBase'
else:
base_class_name = 'ClamsTypesBase'

file_out = io.StringIO()
with open(source_path, 'r') as file_in:
with open(template_file, 'r') as file_in:
for line in file_in.readlines():
file_out.write(line.replace('<VERSION>', spec_version))
for type_name in clams_types:
file_out.write(f" {type_name} = '{vocab_url}/{type_name}'\n")
file_out.write(f" {type_name} = {base_class_name}('{vocab_url}/{type_name}')\n")

string_out = file_out.getvalue()
file_out.close()
return string_out


def generate_vocabulary(spec_version, clams_types, source_path):
def generate_vocabulary(spec_version, clams_types):
"""
:param spec_version:
:param clams_types: the tree
:param source_path: the directory of source txt files
:param template_path: the directory of source txt files
:return:
"""
types = {
'thing_types': ['ThingTypesBase', 'ThingType'],
'annotation_types': ['AnnotationTypesBase', 'AnnotationTypes'],
'document_types': ['DocumentTypesBase', 'DocumentTypes']
'base_types': ['ThingTypesBase', 'ThingType', 'ClamsTypesBase', 'AnnotationTypesBase', 'DocumentTypesBase'],
'annotation_types': ['AnnotationTypes'],
'document_types': ['DocumentTypes']
}
vocabulary_dir = generate_subpack(
mmif_name, mmif_vocabulary_pkg,
Expand All @@ -88,11 +97,11 @@ def generate_vocabulary(spec_version, clams_types, source_path):
'annotation_types': [t for t in clams_types if 'Document' not in t and t != 'Thing'],

# extract thing type
'thing_types': clams_types[:1]
'base_types': clams_types[:1]
}

for mod_name, type_list in type_lists.items():
enum_contents = generate_vocab_enum(spec_version, type_list, os.path.join(source_path, mod_name+'.txt'))
enum_contents = generate_vocab_enum(spec_version, type_list, mod_name)
write_res_file(vocabulary_dir, mod_name+'.py', enum_contents)

return vocabulary_dir
Expand Down Expand Up @@ -151,7 +160,7 @@ def mod_run(self):
import yaml
yaml_file = io.BytesIO(get_spec_file_at_tag(gittag, mmif_vocab_res_oriname))
clams_types = [t['name'] for t in list(yaml.safe_load_all(yaml_file.read()))]
generate_vocabulary(spec_version, clams_types, 'vocabulary_files')
generate_vocabulary(spec_version, clams_types)

ori_run(self)

Expand Down
10 changes: 10 additions & 0 deletions templates/python/vocabulary/annotation_types.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Spec version <VERSION>
# This file is auto-generated by setup.py

from .base_types import AnnotationTypesBase

class AnnotationTypes(AnnotationTypesBase):
"""
This class contains the CLAMS annotation types
defined in the spec version <VERSION> as class variables.
"""
110 changes: 110 additions & 0 deletions templates/python/vocabulary/base_types.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# This file is auto-generated by setup.py

class TypesBase(object):
"""
Base class for arbitrary vocabulary type.
This class provides bisic initializer, comparators,
and (de-)serialization methods.
"""

def __init__(self, type_uri: str):
if '/' in type_uri:
self.base_uri, self.shortname = type_uri.rsplit('/', 1)
else:
self.base_uri = ""
self.shortname = type_uri

@classmethod
def from_str(cls, string: str):
if 'mmif.clams.ai' in string:
if string.endswith('Document'):
return DocumentTypesBase(string)
else:
return AnnotationTypesBase(string)
else:
return cls(string)

def __hash__(self):
return hash(str(self))

def __eq__(self, other):
if isinstance(other, str):
other = self.from_str(other)
return isinstance(other, TypesBase) and self.base_uri == other.base_uri and self.shortname == other.shortname

def __repr__(self):
if len(self.base_uri) > 0:
return f'{self.base_uri}/{self.shortname}'
else:
return self.shortname

# aliases
def __str__(self):
return self.__repr__()

def _serialize(self):
return self.__repr__()


ThingTypesBase = TypesBase


class ClamsTypesBase(ThingTypesBase):
"""
Base class for CLAMS vocabulary types. Main
This class adds handling of MMIF specificaiton versions
in initializer and comparators.
"""

def __init__(self, type_uri: str):
if 'mmif.clams.ai' in type_uri:
self.base_uri, self.version, _, self.shortname = type_uri.rsplit('/', 3)
else:
raise ValueError(f'{type_uri} is not a CLAMS vocabulary URI')

def __hash__(self):
return hash(str(self))

def __eq__(self, other):
if isinstance(other, str):
other = ThingTypesBase.from_str(other)
if isinstance(other, ClamsTypesBase):
if '.' in self.version and '.' in other.version:
# regular version
s_major, s_minor, s_patch = self.version.split('.')
o_major, o_minor, o_patch = other.version.split('.')
if s_major != o_major or s_minor != o_minor:
return False
else:
# dummy version given at development time
if self.version != other.version:
return False
return self.base_uri == other.base_uri and self.shortname == other.shortname
else:
return False

def __repr__(self):
return f'{self.base_uri}/{self.version}/vocabulary/{self.shortname}'


class AnnotationTypesBase(ClamsTypesBase):
"""
Inherit from this class to build your own custom annotation
vocabularies.
"""
...


class DocumentTypesBase(ClamsTypesBase):
"""
Inherit from this class to build your own custom document
vocabularies.
"""
...


class ThingType(ThingTypesBase):
"""
This class contains the topmost CLAMS thing type
defined in the spec version <VERSION> as a class variable.
"""
11 changes: 11 additions & 0 deletions templates/python/vocabulary/document_types.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Spec version <VERSION>
# This file is auto-generated by setup.py

from .base_types import DocumentTypesBase


class DocumentTypes(DocumentTypesBase):
"""
This class contains the CLAMS document types
defined in the spec version <VERSION> as class variables.
"""
Loading