From e776bcbbc8fb7573b092863f358c5de7a44b5887 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 30 Apr 2025 16:17:12 -0500 Subject: [PATCH 01/10] Wrote a tag_summary --- hed/tools/analysis/tag_summary_util.py | 94 ++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 hed/tools/analysis/tag_summary_util.py diff --git a/hed/tools/analysis/tag_summary_util.py b/hed/tools/analysis/tag_summary_util.py new file mode 100644 index 000000000..161f41c0b --- /dev/null +++ b/hed/tools/analysis/tag_summary_util.py @@ -0,0 +1,94 @@ + +from hed.tools.analysis.event_manager import EventManager +from hed.models.tabular_input import TabularInput +from hed.tools.analysis.hed_tag_manager import HedTagManager + +# Excluding tags for condition-variables and task -- these can be done separately if we want to. +REMOVE_TYPES = ['Condition-variable', 'Task'] + +# Tags organized by whether they are found with either of these +# MATCH_TYPES = ['Sensory-event', 'Agent-action'] +MATCH_TYPES = ['Experimental-stimulus', 'Participant-response', 'Incidental', 'Instructional', 'Mishap', + 'Task-activity', 'Warning'] +# If a tag has any of these as a parent, it is excluded +EXCLUDED_PARENTS = {'data-marker', 'data-resolution', 'quantitative-value', 'spatiotemporal-value', + 'statistical-value', 'informational-property', 'organizational-property', + 'grayscale', 'hsv-color', 'rgb-color', 'luminance', 'luminance-contrast', 'opacity', + 'task-effect-evidence', 'task-relationship', 'relation'} + +# If a tag has any of these as a parent, it is replaced by this parent only +CUTOFF_TAGS = {'blue-color', 'brown-color', 'cyan-color', 'gray-color', 'green-color', 'orange-color', + 'pink-color', 'purple-color', 'red-color', 'white-color', 'yellow-color', + 'visual-presentation'} + +# These tags are removed at the end as non-informational +FILTERED_TAGS = {'event', 'agent', 'action', 'move-body-part', 'item', 'biological-item', 'anatomical-item', 'body-part', + 'lower-extremity-part', 'upper-extremity-part', 'head-part', 'torso-part', 'face-part', + 'language-item', 'object', 'geometric-object', + 'man-made-object', 'device', 'computing-device', 'io-device', 'input-device', 'output-device', + 'auditory-device', 'display-device', + 'recording-device', 'natural-object', 'document', 'media', 'media-clip', 'visualization', + 'property', 'agent-property', 'agent-state', + 'agent-cognitive-state', 'agent-emotional-state', 'agent-physiological-state', 'agent-postural-state', + 'agent-task-role', 'agent-trait', + 'data-property', 'biological-artifact', 'nonbiological-artifact', + 'spatial-property', 'temporal-property', 'spectral-property', 'dara-source-type', 'data-value', + 'categorical-value', 'categorical-class-value', 'categorical-judgment-value', + 'categorical-level-value', 'categorical-location-value', 'categorical-orientation-value', + 'physical-value', 'data-variability-attribute', 'environmental-property', 'sensory-property', + 'sensory-attribute', 'auditory-attribute', 'gustatory-attribute', 'olfactory-attribute', + 'tactile-attribute', 'visual-attribute', 'sensory-presentation', 'task-property', 'task-action-type', + 'task-attentional-demand', 'task-event-role', 'task-stimulus-role'} + +def extract_tag_summary(hed_schema, df, sidecar=None, name=None): + """ Extract a summary of the tags in a given tabular input file. + Parameters: + hed_schema (HedSchema): The HedSchema object to use for the summary. + df (pd.DataFrame): The DataFrame to summarize. + sidecar (str): The sidecar file to use for the summary. + name (str): The name of the summary. + + Returns: + dict: A dictionary with the summary information. + """ + + group_dict = {key: set() for key in MATCH_TYPES} + other = set() + input_data = TabularInput(df, sidecar=sidecar, name=name) + event_manager = EventManager(input_data, hed_schema) + tag_man = HedTagManager(event_manager, remove_types=REMOVE_TYPES) + hed_objs = tag_man.get_hed_objs(include_context=False, replace_defs=True) + for hed in hed_objs: + if not hed: + continue + all_tags = hed.get_all_tags() + found = False + for key, tags in group_dict.items(): + if match_tags(all_tags, key): + group_dict[key] = update_tags(group_dict[key], all_tags) + found = True + break + if not found: + other = update_tags(other, all_tags) + + for key, tags in group_dict.items(): + group_dict[key] = tags - FILTERED_TAGS + other = other - FILTERED_TAGS + return group_dict, other + + +def match_tags(all_tags, key): + return any(tag.short_base_tag == key for tag in all_tags) + + +def update_tags(tag_set, all_tags): + for tag in all_tags: + terms = tag.tag_terms + if any(item in EXCLUDED_PARENTS for item in terms): + continue + match = next((item for item in terms if item in CUTOFF_TAGS), None) + if match: + tag_set.add(match) + else: + tag_set.update(tag.tag_terms) + return tag_set \ No newline at end of file From 845b03cb54114fab67701c0ca0d1b512bd372a7a Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:18:48 -0500 Subject: [PATCH 02/10] Updated the match types --- .github/workflows/ci.yaml | 2 +- hed/tools/analysis/tag_summary_util.py | 37 ++++++++++++++++++++------ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b169b8f0b..e1bcf5aa8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,7 +51,7 @@ jobs: # Install dependencies - name: Install dependencies run: | - python -m pip install --upgrade --upgrade-strategy eager pip + python -m pip install --upgrade pip pip install -r requirements.txt pip install -r docs/requirements.txt diff --git a/hed/tools/analysis/tag_summary_util.py b/hed/tools/analysis/tag_summary_util.py index 161f41c0b..a3c856889 100644 --- a/hed/tools/analysis/tag_summary_util.py +++ b/hed/tools/analysis/tag_summary_util.py @@ -1,4 +1,5 @@ - +import os +from hed import load_schema_version from hed.tools.analysis.event_manager import EventManager from hed.models.tabular_input import TabularInput from hed.tools.analysis.hed_tag_manager import HedTagManager @@ -7,9 +8,9 @@ REMOVE_TYPES = ['Condition-variable', 'Task'] # Tags organized by whether they are found with either of these -# MATCH_TYPES = ['Sensory-event', 'Agent-action'] MATCH_TYPES = ['Experimental-stimulus', 'Participant-response', 'Incidental', 'Instructional', 'Mishap', - 'Task-activity', 'Warning'] + 'Task-activity', 'Warning', 'Sensory-event', 'Agent-action'] + # If a tag has any of these as a parent, it is excluded EXCLUDED_PARENTS = {'data-marker', 'data-resolution', 'quantitative-value', 'spatiotemporal-value', 'statistical-value', 'informational-property', 'organizational-property', @@ -40,12 +41,12 @@ 'tactile-attribute', 'visual-attribute', 'sensory-presentation', 'task-property', 'task-action-type', 'task-attentional-demand', 'task-event-role', 'task-stimulus-role'} -def extract_tag_summary(hed_schema, df, sidecar=None, name=None): +def extract_tag_summary(hed_schema, tsv_file, sidecar_file=None, name=None): """ Extract a summary of the tags in a given tabular input file. Parameters: hed_schema (HedSchema): The HedSchema object to use for the summary. - df (pd.DataFrame): The DataFrame to summarize. - sidecar (str): The sidecar file to use for the summary. + tsv_file(str): The path of the tsv file + sidecar_file (str): The sidecar file to use for the summary. name (str): The name of the summary. Returns: @@ -54,7 +55,7 @@ def extract_tag_summary(hed_schema, df, sidecar=None, name=None): group_dict = {key: set() for key in MATCH_TYPES} other = set() - input_data = TabularInput(df, sidecar=sidecar, name=name) + input_data = TabularInput(tsv_file, sidecar=sidecar_file, name=name) event_manager = EventManager(input_data, hed_schema) tag_man = HedTagManager(event_manager, remove_types=REMOVE_TYPES) hed_objs = tag_man.get_hed_objs(include_context=False, replace_defs=True) @@ -91,4 +92,24 @@ def update_tags(tag_set, all_tags): tag_set.add(match) else: tag_set.update(tag.tag_terms) - return tag_set \ No newline at end of file + return tag_set + + +if __name__ == '__main__': + schema = load_schema_version('8.4.0') + root_dir = 'g:/HEDExamples/hed-examples/datasets/eeg_ds003645s_hed' + sidecar_path = os.path.join(root_dir, 'task-FacePerception_events.json') + tsv_path = os.path.join(root_dir, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv') + + tag_dict, others = extract_tag_summary(schema, tsv_path, sidecar_file=sidecar_path, name='eeg_ds003645s_hed') + + for the_key, the_item in tag_dict.items(): + if not the_item: + continue + print(f"{the_key}:") + for tag in the_item: + print(f" {tag}") + + print("Other:") + for tag in others: + print(f" {tag}") \ No newline at end of file From aa47ea69fcc0ffa3dbc8f0f19bbd6394674d0028 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:24:20 -0500 Subject: [PATCH 03/10] Updated the actions to try to fix pip error --- .github/workflows/ci.yaml | 2 +- .github/workflows/ci_cov.yaml | 2 +- .github/workflows/ci_windows.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e1bcf5aa8..d79f7178c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,7 +51,7 @@ jobs: # Install dependencies - name: Install dependencies run: | - python -m pip install --upgrade pip + pip install --upgrade pip pip install -r requirements.txt pip install -r docs/requirements.txt diff --git a/.github/workflows/ci_cov.yaml b/.github/workflows/ci_cov.yaml index b63ca7a70..a5ebae942 100644 --- a/.github/workflows/ci_cov.yaml +++ b/.github/workflows/ci_cov.yaml @@ -46,7 +46,7 @@ jobs: # Install dependencies - name: Install dependencies run: | - python -m pip install --upgrade --upgrade-strategy eager pip + pip install --upgrade pip pip install flake8 coverage -r requirements.txt -r docs/requirements.txt # Run flake8 diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml index d02dfb2e0..d93001394 100644 --- a/.github/workflows/ci_windows.yaml +++ b/.github/workflows/ci_windows.yaml @@ -32,7 +32,7 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade --upgrade-strategy eager pip + pip install --upgrade pip pip install -r requirements.txt - name: Test with unittest From 343dfa78081e117b1c0bf761faaf2df9984efa97 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:29:19 -0500 Subject: [PATCH 04/10] Trying again to upgrade --- .github/workflows/ci.yaml | 1 + .github/workflows/ci_cov.yaml | 1 + .github/workflows/ci_windows.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d79f7178c..6985cf2bf 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,6 +51,7 @@ jobs: # Install dependencies - name: Install dependencies run: | + python -m pip uninstall pip setuptools pip install --upgrade pip pip install -r requirements.txt pip install -r docs/requirements.txt diff --git a/.github/workflows/ci_cov.yaml b/.github/workflows/ci_cov.yaml index a5ebae942..1dd05e50f 100644 --- a/.github/workflows/ci_cov.yaml +++ b/.github/workflows/ci_cov.yaml @@ -46,6 +46,7 @@ jobs: # Install dependencies - name: Install dependencies run: | + python -m pip uninstall pip setuptools pip install --upgrade pip pip install flake8 coverage -r requirements.txt -r docs/requirements.txt diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml index d93001394..460123daf 100644 --- a/.github/workflows/ci_windows.yaml +++ b/.github/workflows/ci_windows.yaml @@ -32,6 +32,7 @@ jobs: - name: Install dependencies run: | + python -m pip uninstall pip setuptools pip install --upgrade pip pip install -r requirements.txt From fc3035d7b0e49c5fac8a1c28fb2da8b4e457493e Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:32:06 -0500 Subject: [PATCH 05/10] Trying not upgrading pip --- .github/workflows/ci.yaml | 2 -- .github/workflows/ci_cov.yaml | 2 -- .github/workflows/ci_windows.yaml | 2 -- 3 files changed, 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6985cf2bf..1f380cae7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,8 +51,6 @@ jobs: # Install dependencies - name: Install dependencies run: | - python -m pip uninstall pip setuptools - pip install --upgrade pip pip install -r requirements.txt pip install -r docs/requirements.txt diff --git a/.github/workflows/ci_cov.yaml b/.github/workflows/ci_cov.yaml index 1dd05e50f..d99d1acf4 100644 --- a/.github/workflows/ci_cov.yaml +++ b/.github/workflows/ci_cov.yaml @@ -46,8 +46,6 @@ jobs: # Install dependencies - name: Install dependencies run: | - python -m pip uninstall pip setuptools - pip install --upgrade pip pip install flake8 coverage -r requirements.txt -r docs/requirements.txt # Run flake8 diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml index 460123daf..0ead772cc 100644 --- a/.github/workflows/ci_windows.yaml +++ b/.github/workflows/ci_windows.yaml @@ -32,8 +32,6 @@ jobs: - name: Install dependencies run: | - python -m pip uninstall pip setuptools - pip install --upgrade pip pip install -r requirements.txt - name: Test with unittest From f73c2b0a06ddf1e3ea402c9588b94d50a166adfb Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 30 Apr 2025 18:03:41 -0500 Subject: [PATCH 06/10] Trying a specific version of pip --- .github/workflows/ci.yaml | 1 + .github/workflows/ci_cov.yaml | 1 + .github/workflows/ci_windows.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1f380cae7..095bbff6b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,6 +51,7 @@ jobs: # Install dependencies - name: Install dependencies run: | + python -m pip install --upgrade pip==23.3.1 pip install -r requirements.txt pip install -r docs/requirements.txt diff --git a/.github/workflows/ci_cov.yaml b/.github/workflows/ci_cov.yaml index d99d1acf4..768217d9f 100644 --- a/.github/workflows/ci_cov.yaml +++ b/.github/workflows/ci_cov.yaml @@ -46,6 +46,7 @@ jobs: # Install dependencies - name: Install dependencies run: | + python -m pip install --upgrade pip==23.3.1 pip install flake8 coverage -r requirements.txt -r docs/requirements.txt # Run flake8 diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml index 0ead772cc..037a1d329 100644 --- a/.github/workflows/ci_windows.yaml +++ b/.github/workflows/ci_windows.yaml @@ -32,6 +32,7 @@ jobs: - name: Install dependencies run: | + python -m pip install --upgrade pip==23.3.1 pip install -r requirements.txt - name: Test with unittest From 1d70c67ef26c9e4e5562e3d1877af660674feb3e Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 30 Apr 2025 18:13:04 -0500 Subject: [PATCH 07/10] Tried not using cache --- .github/workflows/ci.yaml | 3 ++- .github/workflows/ci_cov.yaml | 3 ++- .github/workflows/ci_windows.yaml | 3 ++- hed/tools/analysis/tag_summary_util.zip | Bin 0 -> 2181 bytes 4 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 hed/tools/analysis/tag_summary_util.zip diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 095bbff6b..e348824c6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -42,6 +42,7 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: false - uses: actions/cache@v4 with: @@ -51,7 +52,7 @@ jobs: # Install dependencies - name: Install dependencies run: | - python -m pip install --upgrade pip==23.3.1 + python -m pip install --upgrade pip pip install -r requirements.txt pip install -r docs/requirements.txt diff --git a/.github/workflows/ci_cov.yaml b/.github/workflows/ci_cov.yaml index 768217d9f..c29ab3a47 100644 --- a/.github/workflows/ci_cov.yaml +++ b/.github/workflows/ci_cov.yaml @@ -42,11 +42,12 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: false # Install dependencies - name: Install dependencies run: | - python -m pip install --upgrade pip==23.3.1 + python -m pip install --upgrade pip pip install flake8 coverage -r requirements.txt -r docs/requirements.txt # Run flake8 diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml index 037a1d329..4ba32579f 100644 --- a/.github/workflows/ci_windows.yaml +++ b/.github/workflows/ci_windows.yaml @@ -24,6 +24,7 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: false - uses: actions/cache@v4 with: @@ -32,7 +33,7 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip==23.3.1 + python -m pip install --upgrade pip pip install -r requirements.txt - name: Test with unittest diff --git a/hed/tools/analysis/tag_summary_util.zip b/hed/tools/analysis/tag_summary_util.zip new file mode 100644 index 0000000000000000000000000000000000000000..ced22a82d07edacb349ea6013d68d99849664300 GIT binary patch literal 2181 zcma);XE+-Q7snF>HEV0dZi!NRl<38^x1efVgi@knB?ygFuMTR*s;VGX)vg_B?bXs! zyNF$@s1Y@a+rA&(_kMiO|9Q@M&i{NmUw&4mv~=76#$Q^FAvgm566R@kfMF|_)4Mm1(>qL)!#msFDRHWi2PX_aqg9AV zVyH?*G+*bX|0!>zX&yNsVMLzcjeV?bTH7C|_xj3mjKEUMkC(b8L~N#mGJ7?~2)BZp z?=2ebRzOx()*Gn#N}5RAEFUBHBdfVsKmBO8FB!!^P>$oOi_gXxq>&N_e>RaH246Kx zton1|zGZBMDv3K_e8>DYD#dg(q@v}`L1(B@b3dz5)8eQ?O9Sz*Js&tGf+|^m>^R~q zb*pQR?CFHG&$}fh=$*3Fl#I1u{?K@BJ~F}s@qXIEzKyA3!l24px~u zU%=MSXs=A){;pG8x32P_!4rJi8Ui-Q@z=UH-i3QKDtU1d+Z1&OT+7tab=S%V$IAoj z=4U%+!|N+5uTQT_H7z1K2J+>56Vh`FE1(OE1?TA8CNy}7XD*H;zKLX}+oUsK;g=b- zOr>6SBD1Pq?@p0PW+J|tr??~TDNoE!iQk~2H)yb)hix>fZCl^|xQ;G27PLWj>(8iI z2i}VAqd7=Zn#R`!e_%dhk&N54^=%*q%!#2}48=(tDlxvHZ?1lI%f_B-P2bgX+H3Er zn*z4^E0wQzP)9dh$ApE1cB!XYWIi?cFl;D($kOL8UIwA^LHiw(Xm2iT$!p-6GTNy` zsAm}I^+jXho}RiaA>_0EeFoV0@tWeU%a;T!>w%S@dxOek^g5eF9Y;JR%B@j%Ljib+ z?ZC!LN`m9pPrh#zQ`fmVPvAwOrI^tN+!4>ky-7pMq9D^@q2Adf^{b4|uVqF}G=uif0!_z)tk2)qG4J!(0Nxh;zo4QufQT2qOk2 zXDMS(W$>n&2vN5j`o%kRu=wdJlrl0VeIjeu9^jJIo2!E^|o*OfZX{^}a|7 z@(TUvX5D<_+Me!SbWA;{MUcysU%T`qd!o7vxQnH zEghGs^~_b*t*Nze48(q?Khybet#k%OhOkIHquUHr#H)<4Zi!T?Whb)d9enoYd6F3u z)`8vewG^~4*M2IKZB@#azx}A}Ufg5xU3EJ)X?f-3zRf`viCWn1E8YF_P)@s33`mvQ z7DSLE1hM5}zoyCOlm-EE?G%<+l8^HSZA#SB+&@%>&_XX4Ajex}f}hXBHx19!Q6V}0 zI$i2D^77pBC5t{sN@ks?BFv(eKcCC{tSkZk`U@O)|BmF70L{`C{vFTsI@;ZB-wU^I zIMJWq+RxO=<{}UaJAy$}qd3l$A}F#|EpOZgu*;81UIyC@<|e+35}Ax1D^-bVQj641 z*rtbV66h|=b$+Q?waXn|%a|m#Elkz2Y4!}C{geb!VOL~4dMG9S6!v>kEIge_KD^7-8OGlj)PJ=(PS^)XA1LZ0Ge_51 z#wokqCqnyHI|y7W>X6P(u6L(cHmWZrIaipEZ$zaRtkhgi5@&DkfFYhjV-b$xBwJ_h z0CEND{qw7DBVa{@k^60X9fk||WP627u!b<+4;}F&g$E8c1`u^7xS^l&yi^QR7xRa$ zj#({!vT>rX-!M7cFl^q_$(ph$aG_ORSljAmJj1wala$h4Cw{hPA(}r=v^ZWIQ@@o~ zQRu5kv_y5C6K=XgD#b^dwBp;3Ui`!h)6i(P->u4+%vP114KQ`AS;kvuKD#JWJ=cAF zpSL>g)jl)dBtqfE8l#vZ^T>*XhxwC>wFC?kNDn2}+P5dzc4AHn2YVJv>4X3in05oc zCRqr65dKm1*PoWF1<_r0nZ34ChIZC z4mRiSdI7dv__q0EWA?#eR+mNw9U;%cM)(n9{Uw~IYprb!!C1>oQ+w?M0?$QO4=vdS zArHFRt%h%VtzK zT+%|9ZG(IYjCTNjO>!dN6*^s$zM5UQ;9075>R!-ba7}HBf@O|-C+Fu^+;wwl8Ps|9 z$DP@^xgS|Nn3ljj;D8F;07FTLwK1kII0sspGB65H3Y^w4001()0DzS#kcOM~e_ZhI n9T$NAzm!n_@OS(FPyz@5{7nf~rXc#?C%|8F_*Hose^37aqwEK7 literal 0 HcmV?d00001 From c2c771c745ff474ab822c12442e261252ce4f39b Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 5 May 2025 09:35:47 -0500 Subject: [PATCH 08/10] Updated requirements, first implementation of event checker --- .github/workflows/ci_windows.yaml | 1 - docs/requirements.txt | 20 +- hed/errors/error_messages.py | 21 +- hed/errors/error_types.py | 5 + hed/tools/analysis/event_checker.py | 293 +++++++++++++++++++++ hed/tools/analysis/tag_summary_util.py | 115 -------- hed/tools/analysis/tag_summary_util.zip | Bin 2181 -> 0 bytes requirements.txt | 22 +- tests/tools/analysis/test_event_checker.py | 80 ++++++ 9 files changed, 419 insertions(+), 138 deletions(-) create mode 100644 hed/tools/analysis/event_checker.py delete mode 100644 hed/tools/analysis/tag_summary_util.py delete mode 100644 hed/tools/analysis/tag_summary_util.zip create mode 100644 tests/tools/analysis/test_event_checker.py diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml index 4ba32579f..750f754c3 100644 --- a/.github/workflows/ci_windows.yaml +++ b/.github/workflows/ci_windows.yaml @@ -24,7 +24,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: false - uses: actions/cache@v4 with: diff --git a/docs/requirements.txt b/docs/requirements.txt index 30ba57d1e..1cf074a59 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,12 +1,12 @@ defusedxml>=0.7.1 -inflect>=6.0.5 -jsonschema>=4.17.3 -numpy>=1.21.6 -openpyxl>=3.1.0 -pandas>=1.3.5 -portalocker>=2.7.0 +inflect>=7.5.0 +jsonschema>=4.23.0 +numpy==2.0.2 +openpyxl>=3.1.5 +pandas>=2.2.3 +portalocker>=3.1.1 semantic-version>=2.10.0 -myst-parser>=1.0.0 -Sphinx>=5.2.2 -sphinx_rtd_theme>=1.0.0 -wordcloud==1.9.4 +myst-parser==3.0.1 +Sphinx==7.4.7 +sphinx_rtd_theme>=3.0.2 +wordcloud>=1.9.4 diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index b520d6eb8..8b7d69554 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -5,7 +5,7 @@ from hed.errors.error_reporter import hed_error, hed_tag_error from hed.errors.error_types import (ValidationErrors, SidecarErrors, ErrorSeverity, DefinitionErrors, - TemporalErrors, ColumnErrors) + TemporalErrors, ColumnErrors, TagQualityErrors) @hed_tag_error(ValidationErrors.UNITS_INVALID) @@ -462,3 +462,22 @@ def nested_column_ref(column_name, ref_column): @hed_error(ColumnErrors.MALFORMED_COLUMN_REF, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) def malformed_column_ref(column_name, index, symbol): return f"Column {column_name} has a malformed column reference. Improper symbol {symbol} found at index {index}." + + +@hed_error(TagQualityErrors.MISSING_EVENT_TYPE, default_severity=ErrorSeverity.WARNING, + actual_code=TagQualityErrors.MISSING_EVENT_TYPE) +def missing_event_type(string, line): + return f"The HED string '{string}' at line {line} has no Event type." + + +@hed_error(TagQualityErrors.MISSING_TASK_ROLE, default_severity=ErrorSeverity.WARNING, + actual_code=TagQualityErrors.MISSING_TASK_ROLE) +def missing_task_role(event_type, string, line): + return f"The HED string '{string}' at line {line} with event {event_type} has no Task-event-role type tag." + + +@hed_error(TagQualityErrors.IMPROPER_TAG_GROUPING, default_severity=ErrorSeverity.WARNING, + actual_code=TagQualityErrors.IMPROPER_TAG_GROUPING) +def improper_tag_grouping(event_types, string, line): + return f"The HED string '{string}' at line {line} has multiple events [{event_types}] but is improperly " + \ + f"parenthesized so the other tags cannot be uniquely associated with an event." \ No newline at end of file diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index d743606b6..6157c5978 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -190,3 +190,8 @@ class ColumnErrors: SELF_COLUMN_REF = "SELF_COLUMN_REF" NESTED_COLUMN_REF = "NESTED_COLUMN_REF" MALFORMED_COLUMN_REF = "MALFORMED_COLUMN_REF" + +class TagQualityErrors: + MISSING_EVENT_TYPE = "MISSING_EVENT_TYPE" + MISSING_TASK_ROLE = "MISSING_TASK_ROLE" + IMPROPER_TAG_GROUPING = "IMPROPER_TAG_GROUPING" \ No newline at end of file diff --git a/hed/tools/analysis/event_checker.py b/hed/tools/analysis/event_checker.py new file mode 100644 index 000000000..3bc2d79b9 --- /dev/null +++ b/hed/tools/analysis/event_checker.py @@ -0,0 +1,293 @@ +import os +from hed import load_schema_version, get_printable_issue_string +from hed.tools.analysis.event_manager import EventManager +from hed.errors.error_reporter import ErrorHandler, ErrorContext +from hed.errors.error_types import TagQualityErrors +from hed.models.tabular_input import TabularInput +from hed.tools.analysis.hed_tag_manager import HedTagManager + +class EventChecker: + EVENT_TAGS = {'Event', 'Sensory-event', 'Agent-action', 'Data-feature', 'Experiment-control', + 'Experiment-structure', 'Measurement-event'} + NON_TASK_EVENTS = {'Data-feature', 'Experiment-control', 'Experiment-structure', 'Measurement-event'} + TASK_ROLES = {'Experimental-stimulus', 'Participant-response', 'Incidental', 'Instructional', 'Mishap', + 'Task-activity', 'Warning'} + ACTION_ROLES = {'Appropriate-action', 'Correct-action', 'Correction', 'Done-indication', + 'Imagined-action', 'Inappropriate-action', 'Incorrect-action', 'Indeterminate-action', 'Miss', + 'Near-miss', 'Omitted-action', 'Ready-indication'} + STIMULUS_ROLES = {'Cue', 'Distractor', 'Expected', 'Extraneous', 'Feedback', 'Go-signal', 'Meaningful', + 'Newly-learned', 'Non-informative', 'Non-target', 'Not-meaningful', 'Novel', 'Oddball', + 'Penalty', 'Planned', 'Priming', 'Query', 'Reward', 'Stop-signal', 'Target', 'Threat', + 'Timed', 'Unexpected', 'Unplanned'} + + ALL_ROLES = TASK_ROLES.union(ACTION_ROLES).union(STIMULUS_ROLES) + + def __init__(self, hed_obj, line_number, error_handler=None): + """ Constructor for the EventChecker class. + + Parameters: + hed_obj (HedString): The HED string to check. + line_number (int or None): The index of the HED string in the file. + error_handler (ErrorHandler): The ErrorHandler object to use for error handling. + + """ + self.hed_obj = hed_obj + self.line_number = line_number + if error_handler is None: + self.error_handler = ErrorHandler() + else: + self.error_handler = error_handler + self.issues = self._verify_events(self.hed_obj) + self.group_error = any(issue['code'] == TagQualityErrors.IMPROPER_TAG_GROUPING for issue in self.issues) + + def _verify_events(self, hed_obj): + """ Verify that the events in the HED string are properly grouped. + + Parameters: + hed_obj (HedString): The HED string to verify. + + Returns: + list: list of issues + """ + if not hed_obj: + return [] + hed_groups = [hed_obj] # Initialize with the top-level HedGroup + while len(hed_groups) > 0: + issues = self._check_grouping(hed_groups) + if issues: + return issues + return [] + + def _check_grouping(self, hed_groups): + """ Check for event tagging errors in a group. + + Parameters: + hed_groups (list): A list of the HED Groups to check. + + Returns: + list: list of issues + + """ + group = hed_groups.pop() + all_tags = group.get_all_tags() + event_tags = [tag.short_base_tag for tag in all_tags if tag.short_base_tag in self.EVENT_TAGS] + if not event_tags: + return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_EVENT_TYPE, + string=str(group), line=self.line_number) + if len(event_tags) == 1: + return self._check_task_role(group, event_tags[0], all_tags) + + # At this point, we know we have multiple event tags in the group. + if any(tag.short_base_tag in event_tags for tag in group.tags()): + return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.IMPROPER_TAG_GROUPING, + string=str(group), line=self.line_number, + event_types =', '.join(event_tags)) + hed_groups.extend(group.groups()) + return [] + + def _check_task_role(self, hed_group, event_tag, all_tags): + """ Check that a group with a single event tag has at least one task role tag. + + Parameters: + hed_group (HedGroup): The HED group to check (should have a single event tag). + event_tag (str): The single event tag associated with the group. + all_tags (list): A list of all the HedTag objects in the group. + + Returns: + list: list of issues + + .""" + + if event_tag in self.NON_TASK_EVENTS: + return [] + has_task_role = any(tag.short_base_tag in self.TASK_ROLES for tag in all_tags) + if has_task_role: + return [] + if event_tag == 'Agent-action' and any(tag.short_base_tag in self.ACTION_ROLES for tag in all_tags): + return [] + + if event_tag == 'Sensory-event' and any(tag.short_base_tag in self.STIMULUS_ROLES for tag in all_tags): + return [] + + return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_TASK_ROLE, + event_type=event_tag, string=str(hed_group), + line=self.line_number) + +class EventsSummary: + # Excluding tags for condition-variables and task -- these can be done separately if we want to. + REMOVE_TYPES = ['Condition-variable', 'Task'] + # Tags organized by whether they are found with either of these + MATCH_TYPES = ['Experimental-stimulus', 'Participant-response', 'Cue', 'Feedback', 'Instructional', 'Sensory-event', 'Agent-action'] + + # If a tag has any of these as a parent, it is excluded + EXCLUDED_PARENTS = {'data-marker', 'data-resolution', 'quantitative-value', 'spatiotemporal-value', + 'statistical-value', 'informational-property', 'organizational-property', + 'grayscale', 'hsv-color', 'rgb-color', 'luminance', 'luminance-contrast', 'opacity', + 'task-effect-evidence', 'task-relationship', 'relation'} + + # If a tag has any of these as a parent, it is replaced by this parent only + CUTOFF_TAGS = {'blue-color', 'brown-color', 'cyan-color', 'gray-color', 'green-color', 'orange-color', + 'pink-color', 'purple-color', 'red-color', 'white-color', 'yellow-color', + 'visual-presentation'} + + # These tags are removed at the end as non-informational + FILTERED_TAGS = {'event', 'agent', 'action', 'move-body-part', 'item', 'biological-item', 'anatomical-item', + 'body-part', + 'lower-extremity-part', 'upper-extremity-part', 'head-part', 'torso-part', 'face-part', + 'language-item', 'object', 'geometric-object', + 'man-made-object', 'device', 'computing-device', 'io-device', 'input-device', 'output-device', + 'auditory-device', 'display-device', + 'recording-device', 'natural-object', 'document', 'media', 'media-clip', 'visualization', + 'property', 'agent-property', 'agent-state', + 'agent-cognitive-state', 'agent-emotional-state', 'agent-physiological-state', + 'agent-postural-state', + 'agent-task-role', 'agent-trait', + 'data-property', 'biological-artifact', 'nonbiological-artifact', + 'spatial-property', 'temporal-property', 'spectral-property', 'dara-source-type', 'data-value', + 'categorical-value', 'categorical-class-value', 'categorical-judgment-value', + 'categorical-level-value', 'categorical-location-value', 'categorical-orientation-value', + 'physical-value', 'data-variability-attribute', 'environmental-property', 'sensory-property', + 'sensory-attribute', 'auditory-attribute', 'gustatory-attribute', 'olfactory-attribute', + 'tactile-attribute', 'visual-attribute', 'sensory-presentation', 'task-property', + 'task-action-type', + 'task-attentional-demand', 'task-event-role', 'task-stimulus-role'} + + def __init__(self, hed_schema, file, sidecar=None, name=None): + """ Constructor for the HedString class. + + Parameters: + hed_schema (HedSchema): The HedSchema object to use for the summary. + file (str or FileLike or pd.Dataframe): A tsv file to open. + sidecar (str or Sidecar or FileLike): A Sidecar or source file/filename. + name (str): The name to display for this file for error purposes. + + + """ + self._schema = hed_schema + self.name = name + if name is None and isinstance(file, str): + self.name = file + self.hed_objs = self._initialize_hed(file, sidecar, name) + self.group_error_lines = [] + self.missing_error_lines = [] + + def _initialize_hed(self, file, sidecar, name): + input_data = TabularInput(file, sidecar, name=name) + event_manager = EventManager(input_data, self._schema) + tag_man = HedTagManager(event_manager, remove_types=self.REMOVE_TYPES) + return tag_man.get_hed_objs(include_context=False, replace_defs=True) + + def validate_event_tags(self): + """ Verify that the events in the HED strings validly represent events. + + Returns: + dict: A dictionary with the summary information. + set: A set of tags that do not match any of the specified types but are not excluded. + """ + all_issues = [] + error_handler = ErrorHandler() + error_handler.push_error_context(ErrorContext.FILE_NAME, self.name) + for index, hed_obj in enumerate(self.hed_objs): + if not hed_obj: + continue + event_check = EventChecker(hed_obj, index, error_handler) + if event_check.group_error: + self.group_error_lines.append(index) + if event_check.issues: + self.missing_error_lines.append(index) + all_issues += event_check.issues + return all_issues + + def extract_tag_summary(self): + """ Extract a summary of the tags in a given tabular input file. + + Returns: + dict: A dictionary with the summary information - (str, list) + list: A set of tags that do not match any of the specified types but are not excluded. + """ + + group_dict = {key: set() for key in self.MATCH_TYPES} + other = set() + + for index, hed_obj in enumerate(self.hed_objs): + if not hed_obj or index in self.group_error_lines: + continue + all_tags = hed_obj.get_all_tags() + if index in self.missing_error_lines: + other = self.update_tags(other, all_tags) + continue + found = False + for key, tags in group_dict.items(): + if self.match_tags(all_tags, key): + group_dict[key] = self.update_tags(group_dict[key], all_tags) + found = True + break + if not found: + other = self.update_tags(other, all_tags) + + for key, tags in group_dict.items(): + group_dict[key] = sorted(tags - self.FILTERED_TAGS) + other = sorted(other - self.FILTERED_TAGS) + return group_dict, other + + @staticmethod + def match_tags(all_tags, key): + return any(tag.short_base_tag == key for tag in all_tags) + + def update_tags(self, tag_set, all_tags): + for tag in all_tags: + terms = tag.tag_terms + if any(item in self.EXCLUDED_PARENTS for item in terms): + continue + match = next((item for item in terms if item in self.CUTOFF_TAGS), None) + if match: + tag_set.add(match) + else: + tag_set.update(tag.tag_terms) + return tag_set + + +if __name__ == '__main__': + schema = load_schema_version('8.4.0') + + # # Wakeman Henson example + # root_dir = 'g:/HEDExamples/hed-examples/datasets/eeg_ds003645s_hed' + # sidecar_path = os.path.join(root_dir, 'task-FacePerception_events.json') + # tsv_path = os.path.join(root_dir, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv') + # data_name = 'eeg_ds003645s_hed' + + # # Attention shift example + # root_dir = 'g:/HEDExamples/hed-examples/datasets/eeg_ds002893s_hed_attention_shift' + # sidecar_path = os.path.join(root_dir, 'task-AuditoryVisualShift_events.json') + # tsv_path = os.path.join(root_dir, 'sub-002/eeg/sub-002_task-AuditoryVisualShift_run-01_events.tsv') + # data_name = 'eeg_ds002893s_hed_attention_shift' + + # Sternberg example + root_dir = 'g:/HEDExamples/hed-examples/datasets/eeg_ds004117s_hed_sternberg' + sidecar_path = os.path.join(root_dir, 'task-WorkingMemory_events.json') + tsv_path = os.path.join(root_dir, 'sub-001/ses-01/eeg/sub-001_ses-01_task-WorkingMemory_run-1_events.tsv') + data_name = 'eeg_ds004117s_hed_sternberg' + + # Create the event summary + events_summary = EventsSummary(schema, tsv_path, sidecar_path, data_name) + + # Check the validity of the event tags + issues = events_summary.validate_event_tags() + if issues: + print(f"Errors found in {get_printable_issue_string(issues, '')}") + else: + print(f"No errors found in {data_name}.") + + # Extract the tag summary + tag_dict, others = events_summary.extract_tag_summary() + + for the_key, the_item in tag_dict.items(): + if not the_item: + continue + print(f"{the_key}:") + for tag in the_item: + print(f" {tag}") + + print("Other:") + for tag in others: + print(f" {tag}") \ No newline at end of file diff --git a/hed/tools/analysis/tag_summary_util.py b/hed/tools/analysis/tag_summary_util.py deleted file mode 100644 index a3c856889..000000000 --- a/hed/tools/analysis/tag_summary_util.py +++ /dev/null @@ -1,115 +0,0 @@ -import os -from hed import load_schema_version -from hed.tools.analysis.event_manager import EventManager -from hed.models.tabular_input import TabularInput -from hed.tools.analysis.hed_tag_manager import HedTagManager - -# Excluding tags for condition-variables and task -- these can be done separately if we want to. -REMOVE_TYPES = ['Condition-variable', 'Task'] - -# Tags organized by whether they are found with either of these -MATCH_TYPES = ['Experimental-stimulus', 'Participant-response', 'Incidental', 'Instructional', 'Mishap', - 'Task-activity', 'Warning', 'Sensory-event', 'Agent-action'] - -# If a tag has any of these as a parent, it is excluded -EXCLUDED_PARENTS = {'data-marker', 'data-resolution', 'quantitative-value', 'spatiotemporal-value', - 'statistical-value', 'informational-property', 'organizational-property', - 'grayscale', 'hsv-color', 'rgb-color', 'luminance', 'luminance-contrast', 'opacity', - 'task-effect-evidence', 'task-relationship', 'relation'} - -# If a tag has any of these as a parent, it is replaced by this parent only -CUTOFF_TAGS = {'blue-color', 'brown-color', 'cyan-color', 'gray-color', 'green-color', 'orange-color', - 'pink-color', 'purple-color', 'red-color', 'white-color', 'yellow-color', - 'visual-presentation'} - -# These tags are removed at the end as non-informational -FILTERED_TAGS = {'event', 'agent', 'action', 'move-body-part', 'item', 'biological-item', 'anatomical-item', 'body-part', - 'lower-extremity-part', 'upper-extremity-part', 'head-part', 'torso-part', 'face-part', - 'language-item', 'object', 'geometric-object', - 'man-made-object', 'device', 'computing-device', 'io-device', 'input-device', 'output-device', - 'auditory-device', 'display-device', - 'recording-device', 'natural-object', 'document', 'media', 'media-clip', 'visualization', - 'property', 'agent-property', 'agent-state', - 'agent-cognitive-state', 'agent-emotional-state', 'agent-physiological-state', 'agent-postural-state', - 'agent-task-role', 'agent-trait', - 'data-property', 'biological-artifact', 'nonbiological-artifact', - 'spatial-property', 'temporal-property', 'spectral-property', 'dara-source-type', 'data-value', - 'categorical-value', 'categorical-class-value', 'categorical-judgment-value', - 'categorical-level-value', 'categorical-location-value', 'categorical-orientation-value', - 'physical-value', 'data-variability-attribute', 'environmental-property', 'sensory-property', - 'sensory-attribute', 'auditory-attribute', 'gustatory-attribute', 'olfactory-attribute', - 'tactile-attribute', 'visual-attribute', 'sensory-presentation', 'task-property', 'task-action-type', - 'task-attentional-demand', 'task-event-role', 'task-stimulus-role'} - -def extract_tag_summary(hed_schema, tsv_file, sidecar_file=None, name=None): - """ Extract a summary of the tags in a given tabular input file. - Parameters: - hed_schema (HedSchema): The HedSchema object to use for the summary. - tsv_file(str): The path of the tsv file - sidecar_file (str): The sidecar file to use for the summary. - name (str): The name of the summary. - - Returns: - dict: A dictionary with the summary information. - """ - - group_dict = {key: set() for key in MATCH_TYPES} - other = set() - input_data = TabularInput(tsv_file, sidecar=sidecar_file, name=name) - event_manager = EventManager(input_data, hed_schema) - tag_man = HedTagManager(event_manager, remove_types=REMOVE_TYPES) - hed_objs = tag_man.get_hed_objs(include_context=False, replace_defs=True) - for hed in hed_objs: - if not hed: - continue - all_tags = hed.get_all_tags() - found = False - for key, tags in group_dict.items(): - if match_tags(all_tags, key): - group_dict[key] = update_tags(group_dict[key], all_tags) - found = True - break - if not found: - other = update_tags(other, all_tags) - - for key, tags in group_dict.items(): - group_dict[key] = tags - FILTERED_TAGS - other = other - FILTERED_TAGS - return group_dict, other - - -def match_tags(all_tags, key): - return any(tag.short_base_tag == key for tag in all_tags) - - -def update_tags(tag_set, all_tags): - for tag in all_tags: - terms = tag.tag_terms - if any(item in EXCLUDED_PARENTS for item in terms): - continue - match = next((item for item in terms if item in CUTOFF_TAGS), None) - if match: - tag_set.add(match) - else: - tag_set.update(tag.tag_terms) - return tag_set - - -if __name__ == '__main__': - schema = load_schema_version('8.4.0') - root_dir = 'g:/HEDExamples/hed-examples/datasets/eeg_ds003645s_hed' - sidecar_path = os.path.join(root_dir, 'task-FacePerception_events.json') - tsv_path = os.path.join(root_dir, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv') - - tag_dict, others = extract_tag_summary(schema, tsv_path, sidecar_file=sidecar_path, name='eeg_ds003645s_hed') - - for the_key, the_item in tag_dict.items(): - if not the_item: - continue - print(f"{the_key}:") - for tag in the_item: - print(f" {tag}") - - print("Other:") - for tag in others: - print(f" {tag}") \ No newline at end of file diff --git a/hed/tools/analysis/tag_summary_util.zip b/hed/tools/analysis/tag_summary_util.zip deleted file mode 100644 index ced22a82d07edacb349ea6013d68d99849664300..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2181 zcma);XE+-Q7snF>HEV0dZi!NRl<38^x1efVgi@knB?ygFuMTR*s;VGX)vg_B?bXs! zyNF$@s1Y@a+rA&(_kMiO|9Q@M&i{NmUw&4mv~=76#$Q^FAvgm566R@kfMF|_)4Mm1(>qL)!#msFDRHWi2PX_aqg9AV zVyH?*G+*bX|0!>zX&yNsVMLzcjeV?bTH7C|_xj3mjKEUMkC(b8L~N#mGJ7?~2)BZp z?=2ebRzOx()*Gn#N}5RAEFUBHBdfVsKmBO8FB!!^P>$oOi_gXxq>&N_e>RaH246Kx zton1|zGZBMDv3K_e8>DYD#dg(q@v}`L1(B@b3dz5)8eQ?O9Sz*Js&tGf+|^m>^R~q zb*pQR?CFHG&$}fh=$*3Fl#I1u{?K@BJ~F}s@qXIEzKyA3!l24px~u zU%=MSXs=A){;pG8x32P_!4rJi8Ui-Q@z=UH-i3QKDtU1d+Z1&OT+7tab=S%V$IAoj z=4U%+!|N+5uTQT_H7z1K2J+>56Vh`FE1(OE1?TA8CNy}7XD*H;zKLX}+oUsK;g=b- zOr>6SBD1Pq?@p0PW+J|tr??~TDNoE!iQk~2H)yb)hix>fZCl^|xQ;G27PLWj>(8iI z2i}VAqd7=Zn#R`!e_%dhk&N54^=%*q%!#2}48=(tDlxvHZ?1lI%f_B-P2bgX+H3Er zn*z4^E0wQzP)9dh$ApE1cB!XYWIi?cFl;D($kOL8UIwA^LHiw(Xm2iT$!p-6GTNy` zsAm}I^+jXho}RiaA>_0EeFoV0@tWeU%a;T!>w%S@dxOek^g5eF9Y;JR%B@j%Ljib+ z?ZC!LN`m9pPrh#zQ`fmVPvAwOrI^tN+!4>ky-7pMq9D^@q2Adf^{b4|uVqF}G=uif0!_z)tk2)qG4J!(0Nxh;zo4QufQT2qOk2 zXDMS(W$>n&2vN5j`o%kRu=wdJlrl0VeIjeu9^jJIo2!E^|o*OfZX{^}a|7 z@(TUvX5D<_+Me!SbWA;{MUcysU%T`qd!o7vxQnH zEghGs^~_b*t*Nze48(q?Khybet#k%OhOkIHquUHr#H)<4Zi!T?Whb)d9enoYd6F3u z)`8vewG^~4*M2IKZB@#azx}A}Ufg5xU3EJ)X?f-3zRf`viCWn1E8YF_P)@s33`mvQ z7DSLE1hM5}zoyCOlm-EE?G%<+l8^HSZA#SB+&@%>&_XX4Ajex}f}hXBHx19!Q6V}0 zI$i2D^77pBC5t{sN@ks?BFv(eKcCC{tSkZk`U@O)|BmF70L{`C{vFTsI@;ZB-wU^I zIMJWq+RxO=<{}UaJAy$}qd3l$A}F#|EpOZgu*;81UIyC@<|e+35}Ax1D^-bVQj641 z*rtbV66h|=b$+Q?waXn|%a|m#Elkz2Y4!}C{geb!VOL~4dMG9S6!v>kEIge_KD^7-8OGlj)PJ=(PS^)XA1LZ0Ge_51 z#wokqCqnyHI|y7W>X6P(u6L(cHmWZrIaipEZ$zaRtkhgi5@&DkfFYhjV-b$xBwJ_h z0CEND{qw7DBVa{@k^60X9fk||WP627u!b<+4;}F&g$E8c1`u^7xS^l&yi^QR7xRa$ zj#({!vT>rX-!M7cFl^q_$(ph$aG_ORSljAmJj1wala$h4Cw{hPA(}r=v^ZWIQ@@o~ zQRu5kv_y5C6K=XgD#b^dwBp;3Ui`!h)6i(P->u4+%vP114KQ`AS;kvuKD#JWJ=cAF zpSL>g)jl)dBtqfE8l#vZ^T>*XhxwC>wFC?kNDn2}+P5dzc4AHn2YVJv>4X3in05oc zCRqr65dKm1*PoWF1<_r0nZ34ChIZC z4mRiSdI7dv__q0EWA?#eR+mNw9U;%cM)(n9{Uw~IYprb!!C1>oQ+w?M0?$QO4=vdS zArHFRt%h%VtzK zT+%|9ZG(IYjCTNjO>!dN6*^s$zM5UQ;9075>R!-ba7}HBf@O|-C+Fu^+;wwl8Ps|9 z$DP@^xgS|Nn3ljj;D8F;07FTLwK1kII0sspGB65H3Y^w4001()0DzS#kcOM~e_ZhI n9T$NAzm!n_@OS(FPyz@5{7nf~rXc#?C%|8F_*Hose^37aqwEK7 diff --git a/requirements.txt b/requirements.txt index 2a1c87a8c..52d16dac3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -defusedxml>=0.7.1 -inflect>=6.0.5 -jsonschema>=4.17.3 -matplotlib>=3 -numpy>=1.21.6 -openpyxl>=3.1.0 -pandas>=1.3.5 -pillow>=9.5.0 -portalocker>=2.7.0 -semantic-version>=2.10.0 -wordcloud>=1.9.3 +defusedxml>=0.7.1 +inflect>=7.5.0 +jsonschema>=4.23.0 +matplotlib==3.9.4 +numpy==2.0.2 +openpyxl>=3.1.5 +pandas>=2.2.3 +pillow>=11.2.1 +portalocker>=3.1.1 +semantic-version>=2.10.0 +wordcloud>=1.9.4 diff --git a/tests/tools/analysis/test_event_checker.py b/tests/tools/analysis/test_event_checker.py new file mode 100644 index 000000000..636191966 --- /dev/null +++ b/tests/tools/analysis/test_event_checker.py @@ -0,0 +1,80 @@ +import unittest +from hed.errors.error_types import TagQualityErrors +from hed.schema import load_schema_version +from hed.models.hed_string import HedString +from hed.tools.analysis.event_checker import EventChecker + + +class TestEventChecker(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version('8.3.0') + + def test_no_event_tag(self): + hed_strings = ['Action, (Participant-response, Red)'] + for hed_string in hed_strings: + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 0) + self.assertEqual(checker.issues[0]["code"], TagQualityErrors.MISSING_EVENT_TYPE) + + def test_event_without_task_role(self): + hed_strings = ['Sensory-event, (Red, Blue)', '((Agent-action, Red))'] + for hed_string in hed_strings: + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 2) + self.assertEqual(checker.issues[0]["code"], TagQualityErrors.MISSING_TASK_ROLE) + + def test_event_with_task_role(self): + hed_strings = ['(Sensory-event, (Experimental-stimulus, Blue, Green))', + '((Agent-action, Participant-response, Red))'] + for hed_string in hed_strings: + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 2) + self.assertEqual(checker.issues, []) + + def test_improperly_grouped_event_tags(self): + hed_strings = ['Sensory-event, (Red, Blue), Experiment-control', + '((Sensory-event, (Red, Blue), Experiment-control))'] + for hed_string in hed_strings: + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 2) + self.assertEqual(checker.issues[0]["code"], TagQualityErrors.IMPROPER_TAG_GROUPING) + + def test_nested_group_with_event_and_task_role(self): + hed_strings = ['Sensory-event, ((Experimental-stimulus, Red))', '(Experiment-control, Incidental)'] + for hed_string in hed_strings: + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 5) + self.assertEqual(checker.issues, []) + + def test_empty_hed_string(self): + checker = EventChecker(None, 6) + self.assertEqual(checker.issues, []) + + def test_flat_event_with_task_role(self): + hed_string = 'Agent-action, Participant-response, Red' + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 7) + self.assertEqual(checker.issues, []) + + def test_task_role_without_event(self): + hed_string = '(Experimental-stimulus, Green)' + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 8) + self.assertEqual(checker.issues[0]["code"], TagQualityErrors.MISSING_EVENT_TYPE) + + def test_multiple_event_tags_mixed_grouping(self): + hed_string = 'Sensory-event, (Agent-action, Instructional)' + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 9) + self.assertEqual(checker.issues[0]["code"], TagQualityErrors.IMPROPER_TAG_GROUPING) + + def test_empty_nested_group(self): + hed_string = '(())' + hed_obj = HedString(hed_string, hed_schema=self.hed_schema) + checker = EventChecker(hed_obj, 10) + self.assertEqual(checker.issues[0]["code"], TagQualityErrors.MISSING_EVENT_TYPE) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From b5df3cf3340bd771f64255f481b0bf815dd774dd Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 5 May 2025 09:45:29 -0500 Subject: [PATCH 09/10] Removed 3.8 from support --- .github/workflows/ci.yaml | 5 +- .github/workflows/test_installer.yaml | 88 +++++++++++++-------------- 2 files changed, 46 insertions(+), 47 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e348824c6..70b48382f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,10 +16,10 @@ jobs: run: | if [[ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == 'refs/heads/master' ]]; then # Push to master branch - echo 'matrix=["3.8", "3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT + echo 'matrix=["3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT elif [[ "${{ github.event_name }}" == 'pull_request' && "${{ github.event.pull_request.base.ref }}" == 'master' ]]; then # PR to master branch - echo 'matrix=["3.8", "3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT + echo 'matrix=["3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT else echo 'matrix=["3.9", "3.11"]' >> $GITHUB_OUTPUT fi @@ -42,7 +42,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: false - uses: actions/cache@v4 with: diff --git a/.github/workflows/test_installer.yaml b/.github/workflows/test_installer.yaml index 31526b7ac..4be55c0cc 100644 --- a/.github/workflows/test_installer.yaml +++ b/.github/workflows/test_installer.yaml @@ -1,44 +1,44 @@ -on: - push: - branches: ["develop"] - pull_request: - branches: ["develop"] - -jobs: - build: - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: ["3.8", "3.11"] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Create work directory - run: | - mkdir workdir - echo "WORKDIR=$(pwd)/workdir" >> $GITHUB_OUTPUT - - - name: Create and activate virtual environment - run: | - cd $WORKDIR - python -m venv .venv - source .venv/bin/activate - - - name: Install package - run: | - cd $WORKDIR - source .venv/bin/activate - python -m pip install --upgrade pip - pip install $GITHUB_WORKSPACE - - - name: Run post-installation test - run: | - cd $WORKDIR - source .venv/bin/activate - python -c "from hed.models.hed_string import HedString; print('Import test passed.')" +on: + push: + branches: ["develop"] + pull_request: + branches: ["develop"] + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: ["3.9", "3.11"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Create work directory + run: | + mkdir workdir + echo "WORKDIR=$(pwd)/workdir" >> $GITHUB_OUTPUT + + - name: Create and activate virtual environment + run: | + cd $WORKDIR + python -m venv .venv + source .venv/bin/activate + + - name: Install package + run: | + cd $WORKDIR + source .venv/bin/activate + python -m pip install --upgrade pip + pip install $GITHUB_WORKSPACE + + - name: Run post-installation test + run: | + cd $WORKDIR + source .venv/bin/activate + python -c "from hed.models.hed_string import HedString; print('Import test passed.')" From a22237cbac38c1822479886ae89cb410eb91fc53 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 5 May 2025 09:57:10 -0500 Subject: [PATCH 10/10] Updated the python versions --- .github/workflows/ci.yaml | 6 +++--- .github/workflows/ci_cov.yaml | 3 +-- .github/workflows/ci_windows.yaml | 2 +- pyproject.toml | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 70b48382f..6a737758c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,12 +16,12 @@ jobs: run: | if [[ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == 'refs/heads/master' ]]; then # Push to master branch - echo 'matrix=["3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT + echo 'matrix=["3.9", "3.10", "3.11", "3.12", "3.13"]' >> $GITHUB_OUTPUT elif [[ "${{ github.event_name }}" == 'pull_request' && "${{ github.event.pull_request.base.ref }}" == 'master' ]]; then # PR to master branch - echo 'matrix=["3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT + echo 'matrix=["3.9", "3.10", "3.11", "3.12", "3.13"]' >> $GITHUB_OUTPUT else - echo 'matrix=["3.9", "3.11"]' >> $GITHUB_OUTPUT + echo 'matrix=["3.9", "3.13"]' >> $GITHUB_OUTPUT fi build: diff --git a/.github/workflows/ci_cov.yaml b/.github/workflows/ci_cov.yaml index c29ab3a47..31f3c9946 100644 --- a/.github/workflows/ci_cov.yaml +++ b/.github/workflows/ci_cov.yaml @@ -29,7 +29,7 @@ jobs: strategy: matrix: platform: [ubuntu-latest] - python-version: [ "3.9" ] + python-version: [ "3.12" ] runs-on: ${{ matrix.platform }} @@ -42,7 +42,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: false # Install dependencies - name: Install dependencies diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml index 750f754c3..df667792e 100644 --- a/.github/workflows/ci_windows.yaml +++ b/.github/workflows/ci_windows.yaml @@ -11,7 +11,7 @@ jobs: strategy: matrix: platform: [windows-latest] - python-version: ["3.10"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13""] runs-on: ${{ matrix.platform }} diff --git a/pyproject.toml b/pyproject.toml index 9765045bd..dbc77e9c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ "Operating System :: OS Independent", ] -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "defusedxml",