diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py
index e3a567f3a..1eb62b3c9 100644
--- a/hed/errors/schema_error_messages.py
+++ b/hed/errors/schema_error_messages.py
@@ -94,9 +94,10 @@ def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor):
@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character):
+ from hed.schema.hed_schema_constants import character_types
return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'. "
f"Allowed characters are: a single character, "
- f"or one of the following - letters, blank, digits, alphanumeric.")
+ f"or one of the following - {', '.join(character_types.keys())}.")
@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID,
diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
index 4abb79ede..4194bfe38 100644
--- a/hed/schema/hed_schema_constants.py
+++ b/hed/schema/hed_schema_constants.py
@@ -87,3 +87,11 @@ class HedKey:
NO_LOC_ATTRIB,
UNMERGED_ATTRIBUTE
}
+
+character_types = {
+ "letters": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
+ "blank": set(" "),
+ "digits": set("0123456789"),
+ "alphanumeric": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"),
+ "nonascii": "nonascii" # Special case for all other printable unicode characters
+}
diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py
index 0712e999a..ac55200a2 100644
--- a/hed/schema/schema_attribute_validators.py
+++ b/hed/schema/schema_attribute_validators.py
@@ -14,7 +14,7 @@
from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors
from hed.errors.error_reporter import ErrorHandler
from hed.schema.hed_cache import get_hed_versions
-from hed.schema.hed_schema_constants import HedKey
+from hed.schema.hed_schema_constants import HedKey, character_types
def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name):
@@ -141,6 +141,10 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name):
deprecated_version = tag_entry.attributes.get(attribute_name, "")
library_name = tag_entry.has_attribute(HedKey.InLibrary, return_value=True)
all_versions = get_hed_versions(library_name=library_name)
+ if not library_name:
+ library_name = ""
+ if library_name == hed_schema.library and hed_schema.version_number not in all_versions:
+ all_versions.append(hed_schema.version_number)
if deprecated_version and deprecated_version not in all_versions:
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID,
tag_entry.name,
@@ -182,7 +186,7 @@ def allowed_characters_check(hed_schema, tag_entry, attribute_name):
"""
issues = []
- allowed_strings = {'letters', 'blank', 'digits', 'alphanumeric'}
+ allowed_strings = character_types
char_string = tag_entry.attributes.get(attribute_name, "")
characters = char_string.split(",")
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index 439f5d1df..9d3083046 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -125,6 +125,7 @@ def check_invalid_chars(self):
for tag_name, desc in self.hed_schema.get_desc_iter():
issues_list += validate_schema_description(tag_name, desc)
+ # todo: Do we want to add this?
# todo Activate this session once we have clearer rules on spaces in unit names
# for unit in self.hed_schema.units:
# for i, char in enumerate(unit):
diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py
index e654e4081..691e18ddb 100644
--- a/hed/tools/visualization/tag_word_cloud.py
+++ b/hed/tools/visualization/tag_word_cloud.py
@@ -3,9 +3,10 @@
import numpy as np
from PIL import Image
from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud, generate_contour_svg
+import matplotlib.font_manager as fm
-def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=300, **kwargs):
+def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=300, font_path=None, **kwargs):
""" Takes a word dict and returns a generated word cloud object.
Parameters:
@@ -14,6 +15,8 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400
background_color (str or None): If None, transparent background.
width (int): width in pixels.
height (int): height in pixels.
+ font_path (str): a filename or font name to use. Assumed to be a full file path if it ends with .ttf or .otf.
+ Font names will use a default if a close enough match isn't found.
kwargs (kwargs): Any other parameters WordCloud accepts, overrides default values where relevant.
Returns:
@@ -41,9 +44,11 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400
kwargs.setdefault('color_func', default_color_func)
kwargs.setdefault('relative_scaling', 1)
kwargs.setdefault('max_font_size', height / 20)
- kwargs.setdefault('min_font_size', 8),
+ kwargs.setdefault('min_font_size', 8)
+ if font_path and not font_path.endswith(".ttf") and not font_path.endswith(".otf"):
+ font_path = fm.findfont(font_path)
- wc = WordCloud(background_color=background_color, mask=mask_image,
+ wc = WordCloud(font_path=font_path, background_color=background_color, mask=mask_image,
width=width, height=height, mode="RGBA", **kwargs)
wc.generate_from_frequencies(word_dict)
diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py
index 92c2a2a04..b1351fc3a 100644
--- a/hed/validator/hed_validator.py
+++ b/hed/validator/hed_validator.py
@@ -5,11 +5,13 @@
"""
import re
+from semantic_version import Version
from hed.errors.error_types import ValidationErrors, DefinitionErrors
from hed.errors.error_reporter import ErrorHandler, check_for_any_errors
from hed.validator.def_validator import DefValidator
from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator
+from hed.schema import HedSchema
class HedValidator:
@@ -31,8 +33,16 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False):
self._def_validator = DefValidator(def_dicts, hed_schema)
self._definitions_allowed = definitions_allowed
- self._unit_validator = UnitValueValidator()
- self._char_validator = CharValidator()
+ self._validate_characters = False
+ # todo: This could still do validation on schema groups.
+ if isinstance(hed_schema, HedSchema):
+ validation_version = hed_schema.with_standard
+ if not validation_version:
+ validation_version = hed_schema.version_number
+ self._validate_characters = Version(validation_version) >= Version("8.3.0")
+
+ self._unit_validator = UnitValueValidator(modern_allowed_char_rules=self._validate_characters)
+ self._char_validator = CharValidator(modern_allowed_char_rules=self._validate_characters)
self._string_validator = StringValidator()
self._tag_validator = TagValidator()
self._group_validator = GroupValidator(hed_schema)
diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/tag_util/char_util.py
index d575463ec..06d3062a3 100644
--- a/hed/validator/tag_util/char_util.py
+++ b/hed/validator/tag_util/char_util.py
@@ -14,6 +14,14 @@ class CharValidator:
INVALID_STRING_CHARS = '[]{}~'
INVALID_STRING_CHARS_PLACEHOLDERS = '[]~'
+ def __init__(self, modern_allowed_char_rules=False):
+ """Does basic character validation for hed strings/tags
+
+ Parameters:
+ modern_allowed_char_rules(bool): If True, use 8.3 style rules for unicode characters.
+ """
+ self._validate_characters = modern_allowed_char_rules
+
def check_invalid_character_issues(self, hed_string, allow_placeholders):
""" Report invalid characters.
@@ -33,8 +41,12 @@ def check_invalid_character_issues(self, hed_string, allow_placeholders):
if allow_placeholders:
invalid_dict = self.INVALID_STRING_CHARS_PLACEHOLDERS
for index, character in enumerate(hed_string):
- if character in invalid_dict or ord(character) > 127:
- validation_issues += self._report_invalid_character_error(hed_string, index)
+ if self._validate_characters:
+ if character in invalid_dict or not character.isprintable():
+ validation_issues += self._report_invalid_character_error(hed_string, index)
+ else:
+ if character in invalid_dict or ord(character) > 127:
+ validation_issues += self._report_invalid_character_error(hed_string, index)
return validation_issues
diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py
index 72f4f0948..9a7569f68 100644
--- a/hed/validator/tag_util/class_util.py
+++ b/hed/validator/tag_util/class_util.py
@@ -1,10 +1,12 @@
""" Utilities to support HED validation. """
import datetime
import re
+import functools
from hed.errors.error_reporter import ErrorHandler
from hed.errors.error_types import ValidationErrors
+from hed.schema.hed_schema_constants import HedKey, character_types
class UnitValueValidator:
@@ -18,13 +20,14 @@ class UnitValueValidator:
VALUE_CLASS_ALLOWED_CACHE = 20
- def __init__(self, value_validators=None):
+ def __init__(self, modern_allowed_char_rules=False, value_validators=None):
""" Validates the unit and value classes on a given tag.
Parameters:
value_validators(dict or None): Override or add value class validators
"""
+ self._validate_characters = modern_allowed_char_rules
self._value_validators = self._get_default_value_class_validators()
if value_validators and isinstance(value_validators, dict):
self._value_validators.update(value_validators)
@@ -97,25 +100,20 @@ def check_tag_value_class_valid(self, original_tag, validate_text, report_as=Non
"""
return self._check_value_class(original_tag, validate_text, report_as, error_code, index_offset)
- # char_sets = {
- # "letters": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
- # "blank": set(" "),
- # "digits": set("0123456789"),
- # "alphanumeric": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
- # }
- #
- # @functools.lru_cache(maxsize=VALUE_CLASS_ALLOWED_CACHE)
- # def _get_allowed_characters(self, value_classes):
- # # This could be pre-computed
- # character_set = set()
- # for value_class in value_classes:
- # allowed_types = value_class.attributes.get(HedKey.AllowedCharacter, "")
- # for single_type in allowed_types.split(","):
- # if single_type in self.char_sets:
- # character_set.update(self.char_sets[single_type])
- # else:
- # character_set.add(single_type)
- # return character_set
+ @functools.lru_cache(maxsize=VALUE_CLASS_ALLOWED_CACHE)
+ def _get_allowed_characters(self, value_classes):
+ # This could be pre-computed
+ character_set = set()
+ for value_class in value_classes:
+ allowed_types = value_class.attributes.get(HedKey.AllowedCharacter, "")
+ for single_type in allowed_types.split(","):
+ if single_type in character_types and single_type != "nonascii":
+ character_set.update(character_types[single_type])
+ else:
+ character_set.add(single_type)
+ # for now, just always allow these special cases(it's validated extensively elsewhere)
+ character_set.update("#/")
+ return character_set
def _get_problem_indexes(self, original_tag, stripped_value):
""" Return list of problem indices for error messages.
@@ -127,19 +125,24 @@ def _get_problem_indexes(self, original_tag, stripped_value):
Returns:
list: List of int locations in which error occurred.
"""
+ indexes = []
# Extra +1 for the slash
start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1
if start_index == -1:
- return []
+ return indexes
- problem_indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char in "{}"]
- return problem_indexes
- # Partial implementation of allowedCharacter
- # allowed_characters = self._get_allowed_characters(original_tag.value_classes.values())
- # if allowed_characters:
- # # Only test the strippedvalue - otherwise numericClass + unitClass won't validate reasonably.
- # indexes = [index for index, char in enumerate(stripped_value) if char not in allowed_characters]
- # pass
+ if self._validate_characters:
+ allowed_characters = self._get_allowed_characters(original_tag.value_classes.values())
+
+ if allowed_characters:
+ # Only test the strippedvalue - otherwise numericClass + unitClass won't validate reasonably.
+ indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char not in allowed_characters]
+ if "nonascii" in allowed_characters:
+ # Filter out ascii characters
+ indexes = [(char, index) for char, index in indexes if not (ord(char) > 127 and char.isprintable())]
+ else:
+ indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char in "{}"]
+ return indexes
def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0):
""" Return any issues found if this is a value tag,
@@ -219,12 +222,14 @@ def validate_value_class_type(self, unit_or_value_portion, valid_types):
type_valid (bool): True if this is one of the valid_types validators.
"""
+ has_valid_func = False
for unit_class_type in valid_types:
valid_func = self._value_validators.get(unit_class_type)
if valid_func:
+ has_valid_func = True
if valid_func(unit_or_value_portion):
return True
- return False
+ return not has_valid_func
def is_date_time(date_time_string):
diff --git a/tests/data/schema_tests/schema_utf8.mediawiki b/tests/data/schema_tests/schema_utf8.mediawiki
new file mode 100644
index 000000000..4eb370653
--- /dev/null
+++ b/tests/data/schema_tests/schema_utf8.mediawiki
@@ -0,0 +1,168 @@
+HED version="8.3.0" unmerged="True"
+
+'''Prologue'''
+
+!# start schema
+
+'''Tag1'''
+* Café
+
+'''Ascii'''
+ * # {takesValue, valueClass=textClass}
+
+ '''NonAscii'''
+ * # {takesValue, valueClass=testUnicodeClass}
+
+!# end schema
+
+'''Unit classes''' [Unit classes and the units for the nodes.]
+* accelerationUnits {defaultUnits=m-per-s^2}
+** m-per-s^2 {SIUnit, unitSymbol, conversionFactor=1.0}
+* angleUnits {defaultUnits=radian}
+** radian {SIUnit, conversionFactor=1.0}
+** rad {SIUnit, unitSymbol, conversionFactor=1.0}
+** degree {conversionFactor=0.0174533}
+* areaUnits {defaultUnits=m^2}
+** m^2 {SIUnit, unitSymbol, conversionFactor=1.0}
+* currencyUnits {defaultUnits=$}[Units indicating the worth of something.]
+** dollar {conversionFactor=1.0}
+** $ {unitPrefix, unitSymbol, conversionFactor=1.0}
+** euro
+** point
+* electricPotentialUnits {defaultUnits=uv}
+** v {SIUnit, unitSymbol, conversionFactor=0.000001}
+** Volt {SIUnit, conversionFactor=0.000001}
+* frequencyUnits {defaultUnits=Hz}
+** hertz {SIUnit, conversionFactor=1.0}
+** Hz {SIUnit, unitSymbol, conversionFactor=1.0}
+* intensityUnits {defaultUnits=dB}
+** dB {unitSymbol, conversionFactor=1.0}[Intensity expressed as ratio to a threshold. May be used for sound intensity.]
+** candela {SIUnit}[Units used to express light intensity.]
+** cd {SIUnit, unitSymbol}[Units used to express light intensity.]
+* jerkUnits {defaultUnits=m-per-s^3}
+** m-per-s^3 {unitSymbol, conversionFactor=1.0}
+* magneticFieldUnits {defaultUnits=fT}[Units used to magnetic field intensity.]
+** tesla {SIUnit, conversionFactor=10^-15}
+** T {SIUnit, unitSymbol, conversionFactor=10^-15}
+* memorySizeUnits {defaultUnits=B}
+** byte {SIUnit, conversionFactor=1.0}
+** B {SIUnit, unitSymbol, conversionFactor=1.0}
+* physicalLengthUnits {defaultUnits=m}
+** foot {conversionFactor=0.3048}
+** inch {conversionFactor=0.0254}
+** meter {SIUnit, conversionFactor=1.0}
+** metre {SIUnit, conversionFactor=1.0}
+** m {SIUnit, unitSymbol, conversionFactor=1.0}
+** mile {conversionFactor=1609.34}
+* speedUnits {defaultUnits=m-per-s}
+** m-per-s {SIUnit, unitSymbol, conversionFactor=1.0}
+** mph {unitSymbol, conversionFactor=0.44704}
+** kph {unitSymbol, conversionFactor=0.277778}
+* temperatureUnits {defaultUnits=degree Celsius}
+** degree Celsius {SIUnit, conversionFactor=1.0}
+** oC {SIUnit, unitSymbol, conversionFactor=1.0}
+* timeUnits {defaultUnits=s}
+** second {SIUnit, conversionFactor=1.0}
+** s {SIUnit, unitSymbol, conversionFactor=1.0}
+** day {conversionFactor=86400}
+** minute {conversionFactor=60}
+** hour {conversionFactor=3600}[Should be in 24-hour format.]
+* volumeUnits {defaultUnits=m^3}
+** m^3 {SIUnit, unitSymbol, conversionFactor=1.0}
+* weightUnits {defaultUnits=g}
+** g {SIUnit, unitSymbol, conversionFactor=1.0}
+** gram {SIUnit, conversionFactor=1.0}
+** pound {conversionFactor=453.592}
+** lb {conversionFactor=453.592}
+
+
+'''Unit modifiers''' [Unit multiples and submultiples.]
+* deca {SIUnitModifier, conversionFactor=10.0} [SI unit multiple representing 10^1.]
+* da {SIUnitSymbolModifier, conversionFactor=10.0} [SI unit multiple representing 10^1.]
+* hecto {SIUnitModifier, conversionFactor=100.0} [SI unit multiple representing 10^2.]
+* h {SIUnitSymbolModifier, conversionFactor=100.0} [SI unit multiple representing 10^2.]
+* kilo {SIUnitModifier, conversionFactor=1000.0} [SI unit multiple representing 10^3.]
+* k {SIUnitSymbolModifier, conversionFactor=1000.0} [SI unit multiple representing 10^3.]
+* mega {SIUnitModifier, conversionFactor=10^6} [SI unit multiple representing 10^6.]
+* M {SIUnitSymbolModifier, conversionFactor=10^6} [SI unit multiple representing 10^6.]
+* giga {SIUnitModifier, conversionFactor=10^9} [SI unit multiple representing 10^9.]
+* G {SIUnitSymbolModifier, conversionFactor=10^9} [SI unit multiple representing 10^9.]
+* tera {SIUnitModifier, conversionFactor=10^12} [SI unit multiple representing 10^12.]
+* T {SIUnitSymbolModifier, conversionFactor=10^12} [SI unit multiple representing 10^12.]
+* peta {SIUnitModifier, conversionFactor=10^15} [SI unit multiple representing 10^15.]
+* P {SIUnitSymbolModifier, conversionFactor=10^15} [SI unit multiple representing 10^15.]
+* exa {SIUnitModifier, conversionFactor=10^18} [SI unit multiple representing 10^18.]
+* E {SIUnitSymbolModifier, conversionFactor=10^18} [SI unit multiple representing 10^18.]
+* zetta {SIUnitModifier, conversionFactor=10^21} [SI unit multiple representing 10^21.]
+* Z {SIUnitSymbolModifier, conversionFactor=10^21} [SI unit multiple representing 10^21.]
+* yotta {SIUnitModifier, conversionFactor=10^24} [SI unit multiple representing 10^24.]
+* Y {SIUnitSymbolModifier, conversionFactor=10^24} [SI unit multiple representing 10^24.]
+* deci {SIUnitModifier, conversionFactor=0.1}[SI unit submultiple representing 10^-1.]
+* d {SIUnitSymbolModifier, conversionFactor=0.1} [SI unit submultiple representing 10^-1.]
+* centi {SIUnitModifier, conversionFactor=0.01} [SI unit submultiple representing 10^-2.]
+* c {SIUnitSymbolModifier, conversionFactor=0.01} [SI unit submultiple representing 10^-2.]
+* milli {SIUnitModifier, conversionFactor=0.001} [SI unit submultiple representing 10^-3.]
+* m {SIUnitSymbolModifier, conversionFactor=0.001} [SI unit submultiple representing 10^-3.]
+* micro {SIUnitModifier, conversionFactor=10^-6} [SI unit submultiple representing 10^-6.]
+* u {SIUnitSymbolModifier, conversionFactor=10^-6} [SI unit submultiple representing 10^-6.]
+* nano {SIUnitModifier, conversionFactor=10^-9} [SI unit submultiple representing 10^-9.]
+* n {SIUnitSymbolModifier, conversionFactor=10^-9} [SI unit submultiple representing 10^-9.]
+* pico {SIUnitModifier, conversionFactor=10^-12} [SI unit submultiple representing 10^-12.]
+* p {SIUnitSymbolModifier, conversionFactor=10^-12} [SI unit submultiple representing 10^-12.]
+* femto {SIUnitModifier, conversionFactor=10^-15} [SI unit submultiple representing 10^-15.]
+* f {SIUnitSymbolModifier, conversionFactor=10^-15} [SI unit submultiple representing 10^-15.]
+* atto {SIUnitModifier, conversionFactor=10^-18} [SI unit submultiple representing 10^-18.]
+* a {SIUnitSymbolModifier, conversionFactor=10^-18} [SI unit submultiple representing 10^-18.]
+* zepto {SIUnitModifier, conversionFactor=10^-21} [SI unit submultiple representing 10^-21.]
+* z {SIUnitSymbolModifier, conversionFactor=10^-21} [SI unit submultiple representing 10^-21.]
+* yocto {SIUnitModifier, conversionFactor=10^-24} [SI unit submultiple representing 10^-24.]
+* y {SIUnitSymbolModifier, conversionFactor=10^-24} [SI unit submultiple representing 10^-24.]
+
+
+'''Value classes''' [Specification of the rules for the values provided by users.]
+* dateTimeClass {allowedCharacter=digits,allowedCharacter=T,allowedCharacter=-,allowedCharacter=:}[Date-times should conform to ISO8601 date-time format YYYY-MM-DDThh:mm:ss. Any variation on the full form is allowed.]
+* nameClass {allowedCharacter=letters,allowedCharacter=digits,allowedCharacter=_,allowedCharacter=-}[Value class designating values that have the characteristics of node names. The allowed characters are alphanumeric, hyphen, and underbar.]
+* numericClass {allowedCharacter=digits,allowedCharacter=E,allowedCharacter=e,allowedCharacter=+,allowedCharacter=-,allowedCharacter=.}[Value must be a valid numerical value.]
+* posixPath {allowedCharacter=digits,allowedCharacter=letters,allowedCharacter=/,allowedCharacter=:}[Posix path specification.]
+* textClass {allowedCharacter=letters, allowedCharacter=digits, allowedCharacter=blank, allowedCharacter=+, allowedCharacter=-, allowedCharacter=:, allowedCharacter=;, allowedCharacter=., allowedCharacter=/, allowedCharacter=(, allowedCharacter=), allowedCharacter=?, allowedCharacter=*, allowedCharacter=%, allowedCharacter=$, allowedCharacter=@}[Value class designating values that have the characteristics of text such as in descriptions.]
+* testUnicodeClass {allowedCharacter=letters, allowedCharacter=nonascii, allowedCharacter=digits, allowedCharacter=blank, allowedCharacter=+, allowedCharacter=-, allowedCharacter=:, allowedCharacter=;, allowedCharacter=., allowedCharacter=/, allowedCharacter=(, allowedCharacter=), allowedCharacter=?, allowedCharacter=*, allowedCharacter=%, allowedCharacter=$, allowedCharacter=@}[Test class to see if unicode is allowed]
+
+'''Schema attributes''' [Allowed attribute modifiers of other sections of the schema.]
+* allowedCharacter {valueClassProperty}[A schema attribute of value classes specifying a special character that is allowed in expressing the value of a placeholder. Normally the allowed characters are listed individually. However, the word letters designates the upper and lower case alphabetic characters and the word digits designates the digits 0-9. The word blank designates the blank character.]
+* conversionFactor {unitProperty, unitModifierProperty}[The multiplicative factor to multiply these units to convert to default units.]
+* deprecatedFrom {elementProperty}[Indicates that this element is deprecated. The value of the attribute is the latest schema version in which the element appeared in undeprecated form.]
+* defaultUnits {unitClassProperty}[A schema attribute of unit classes specifying the default units to use if the placeholder has a unit class but the substituted value has no units.]
+* extensionAllowed {boolProperty, nodeProperty, isInheritedProperty}[A schema attribute indicating that users can add unlimited levels of child nodes under this tag. This tag is propagated to child nodes with the exception of the hashtag placeholders.]
+* inLibrary {elementProperty} [Indicates this schema element came from the named library schema, not the standard schema. This attribute is added by tools when a library schema is merged into its partnered standard schema.]
+* recommended {boolProperty, nodeProperty}[A schema attribute indicating that the event-level HED string should include this tag.]
+* relatedTag {nodeProperty, isInheritedProperty}[A schema attribute suggesting HED tags that are closely related to this tag. This attribute is used by tagging tools.]
+* requireChild {boolProperty, nodeProperty}[A schema attribute indicating that one of the node elements descendants must be included when using this tag.]
+* required {boolProperty, nodeProperty}[A schema attribute indicating that every event-level HED string should include this tag.]
+* reserved {boolProperty, nodeProperty}[A schema attribute indicating that this tag has special meaning and requires special handling by tools.]
+* rooted {nodeProperty}[Indicates a top-level library schema node is identical to a node of the same name in the partnered standard schema. This attribute can only appear in nodes that have the inLibrary schema attribute.]
+* SIUnit {boolProperty, unitProperty}[A schema attribute indicating that this unit element is an SI unit and can be modified by multiple and submultiple names. Note that some units such as byte are designated as SI units although they are not part of the standard.]
+* SIUnitModifier {boolProperty, unitModifierProperty}[A schema attribute indicating that this SI unit modifier represents a multiple or submultiple of a base unit rather than a unit symbol.]
+* SIUnitSymbolModifier {boolProperty, unitModifierProperty}[A schema attribute indicating that this SI unit modifier represents a multiple or submultiple of a unit symbol rather than a base symbol.]
+* suggestedTag {nodeProperty, isInheritedProperty}[A schema attribute that indicates another tag that is often associated with this tag. This attribute is used by tagging tools to provide tagging suggestions.]
+* tagGroup {boolProperty, nodeProperty}[A schema attribute indicating the tag can only appear inside a tag group.]
+* takesValue {boolProperty, nodeProperty}[A schema attribute indicating the tag is a hashtag placeholder that is expected to be replaced with a user-defined value.]
+* topLevelTagGroup {boolProperty, nodeProperty}[A schema attribute indicating that this tag (or its descendants) can only appear in a top-level tag group. A tag group can have at most one tag with this attribute.]
+* unique {boolProperty, nodeProperty}[A schema attribute indicating that only one of this tag or its descendants can be used in the event-level HED string.]
+* unitClass {nodeProperty}[A schema attribute specifying which unit class this value tag belongs to.]
+* unitPrefix {boolProperty, unitProperty}[A schema attribute applied specifically to unit elements to designate that the unit indicator is a prefix (e.g., dollar sign in the currency units).]
+* unitSymbol {boolProperty, unitProperty}[A schema attribute indicating this tag is an abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.]
+* valueClass {nodeProperty}[A schema attribute specifying which value class this value tag belongs to.]
+
+'''Properties''' [Properties of the schema attributes themselves. These are used for schema handling and verification.]
+* boolProperty [Indicates that the schema attribute represents something that is either true or false and does not have a value. Attributes without this value are assumed to have string values.]
+* elementProperty [Indicates this schema attribute can apply to any type of element(tag term, unit class, etc).]
+* isInheritedProperty [Indicates that this attribute is inherited by child nodes. This property only applies to schema attributes for nodes.]
+* nodeProperty [Indicates this schema attribute applies to node (tag-term) elements. This was added to allow for an attribute to apply to multiple elements.]
+* unitClassProperty [Indicates that the schema attribute is meant to be applied to unit classes.]
+* unitModifierProperty [Indicates that the schema attribute is meant to be applied to unit modifier classes.]
+* unitProperty [Indicates that the schema attribute is meant to be applied to units within a unit class.]
+* valueClassProperty [Indicates that the schema attribute is meant to be applied to value classes.]
+
+'''Epilogue'''
+
+!# end hed
diff --git a/tests/schema/test_schema_attribute_validators.py b/tests/schema/test_schema_attribute_validators.py
index 4b5f8e6f4..7773620b9 100644
--- a/tests/schema/test_schema_attribute_validators.py
+++ b/tests/schema/test_schema_attribute_validators.py
@@ -87,7 +87,7 @@ def test_deprecatedFrom(self):
self.assertFalse(schema_attribute_validators.tag_is_deprecated_check(self.hed_schema, tag_entry, attribute_name))
def test_conversionFactor(self):
- tag_entry = self.hed_schema.unit_classes["accelerationUnits"].units['m-per-s^2']
+ tag_entry = self.hed_schema.unit_classes["accelerationUnits"].units["m-per-s^2"]
attribute_name = "conversionFactor"
self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name))
@@ -102,7 +102,7 @@ def test_conversionFactor(self):
self.assertTrue(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name))
def test_conversionFactor_modifier(self):
- tag_entry = self.hed_schema.unit_classes["magneticFieldUnits"].units['tesla']
+ tag_entry = self.hed_schema.unit_classes["magneticFieldUnits"].units["tesla"]
attribute_name = "conversionFactor"
self.assertFalse(schema_attribute_validators.conversion_factor(self.hed_schema, tag_entry, attribute_name))
@@ -119,7 +119,7 @@ def test_conversionFactor_modifier(self):
def test_allowed_characters_check(self):
tag_entry = self.hed_schema.value_classes["dateTimeClass"]
attribute_name = "allowedCharacter"
- valid_attributes = {'letters', 'blank', 'digits', 'alphanumeric', ":", "$", "a"}
+ valid_attributes = {"letters", "blank", "digits", "alphanumeric", ":", "$", "a"}
self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name))
tag_entry = copy.deepcopy(tag_entry)
@@ -127,7 +127,7 @@ def test_allowed_characters_check(self):
tag_entry.attributes[attribute_name] = attribute
self.assertFalse(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name))
- invalid_attributes = {'lettersdd', 'notaword', ":a"}
+ invalid_attributes = {"lettersdd", "notaword", ":a"}
for attribute in invalid_attributes:
tag_entry.attributes[attribute_name] = attribute
self.assertTrue(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name))
diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py
index bb25e1459..1516f3b93 100644
--- a/tests/tools/visualization/test_tag_word_cloud.py
+++ b/tests/tools/visualization/test_tag_word_cloud.py
@@ -2,6 +2,7 @@
import wordcloud
from hed.tools.visualization import tag_word_cloud
from hed.tools.visualization.tag_word_cloud import load_and_resize_mask
+import matplotlib.font_manager as fm
import numpy as np
from PIL import Image, ImageDraw
@@ -24,6 +25,32 @@ def test_create_wordcloud(self):
self.assertEqual(wc.width, width)
self.assertEqual(wc.height, height)
+ def test_create_wordcloud_font(self):
+ word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7}
+ width = 400
+ height = 200
+ wc = tag_word_cloud.create_wordcloud(word_dict, width=width, height=height, font_path="Serif")
+
+ self.assertIsInstance(wc, wordcloud.WordCloud)
+ self.assertEqual(wc.width, width)
+ self.assertEqual(wc.height, height)
+ self.assertIn("Serif", wc.font_path)
+
+ def test_create_wordcloud_font_direct(self):
+ word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7}
+ width = 400
+ height = 200
+
+ fonts = fm.findSystemFonts()
+ first_font = fonts[0]
+
+ wc = tag_word_cloud.create_wordcloud(word_dict, width=width, height=height, font_path=first_font)
+
+ self.assertIsInstance(wc, wordcloud.WordCloud)
+ self.assertEqual(wc.width, width)
+ self.assertEqual(wc.height, height)
+ self.assertIn(first_font, wc.font_path)
+
def test_create_wordcloud_default_params(self):
word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7}
wc = tag_word_cloud.create_wordcloud(word_dict)
diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py
index 1505c28e7..edbce2e26 100644
--- a/tests/validator/test_tag_validator.py
+++ b/tests/validator/test_tag_validator.py
@@ -949,5 +949,33 @@ def test_special_units(self):
self.validator_semantic(test_strings, expected_results, expected_issues, True)
+class TestHedAllowedCharacters(TestHed):
+ compute_forms = True
+ schema_file = '../data/schema_tests/schema_utf8.mediawiki'
+
+ @staticmethod
+ def string_obj_func(validator):
+ return partial(validator._validate_individual_tags_in_hed_string)
+
+ def test_special_units(self):
+ test_strings = {
+ 'ascii': 'Ascii/bad-date',
+ 'badascii': 'Ascii/bad-daté',
+ 'nonascii': 'Nonascii/Café',
+ }
+ expected_results = {
+ 'ascii': True,
+ 'badascii': False,
+ 'nonascii': True
+ }
+
+ expected_issues = {
+ 'ascii': [],
+ 'badascii': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, tag=0,
+ index_in_tag=13, index_in_tag_end=14),
+ 'nonascii': []
+ }
+ self.validator_semantic(test_strings, expected_results, expected_issues, True)
+
if __name__ == '__main__':
unittest.main()