diff --git a/README.md b/README.md index 5511a71..f601dcb 100755 --- a/README.md +++ b/README.md @@ -6,46 +6,27 @@ which can be installed into a Spacy pipeline. They annotate the Spacy parse tree with additional attributes that make it easy to summarize information about features of student writing. +Before You Install +------------ + +It is helpful to note that the use of AWE Components is best tested using [AWE_Workbench](https://github.com/ArgLab/AWE_Workbench), which utilizes the features defined in AWE Components. There are a series of automatic tests which can be run to verify or validate AWE Components; in addition, there are examples, a web server for parsing documents, and an interactive document highlighting tool for visualizing the document features which are derived from AWE Components. + +See AWE Workbench's installations steps and verify that you'd want to use it instead of installing AWE Components directly. + Installation ------------ -Set up Python 3.9. 3.8 will *not* work. If you wish to use `conda`: +Set up Python 3.11. If you wish to use `conda`: - conda create -n test_install python=3.9 pip + conda create -n test_install python=3.11 pip pip install pip --upgrade conda activate test_install If you wish to use plain old `pip` with `virtualenvwrapper`: - mkvirtualenv awe_components --python=/usr/bin/python3.9 + mkvirtualenv awe_components --python=/usr/bin/python3.11 pip install pip --upgrade -Install prerequisites: - -[Holmes Extractor Expandable](https://github.com/ETS-Next-Gen/holmes-extractor-expandable): - - git clone git@github.com:ETS-Next-Gen/holmes-extractor-expandable.git - cd holmes-extractor-expandable/~ - pip install . - -[AWE Language Tool](https://github.com/ETS-Next-Gen/AWE_LanguageTool): - - git clone git@github.com:ETS-Next-Gen/AWE_LanguageTool.git - cd AWE_LanguageTool/ - pip install . - -[AWE Spell Correct](https://github.com/ETS-Next-Gen/AWE_SpellCorrect) - - git clone git@github.com:ETS-Next-Gen/AWE_SpellCorrect.git - cd AWE_SpellCorrect/ - pip install . - -[AWE Lexica](https://github.com/ETS-Next-Gen/AWE_Lexica) - - git clone git@github.com:ETS-Next-Gen/AWE_Lexica.git - cd AWE_Lexica/ - pip install . - Then from the AWE Workbench Components directory: pip install . diff --git a/awe_components/components/contentSegmentation.py b/awe_components/components/contentSegmentation.py index 6895545..58903d9 100644 --- a/awe_components/components/contentSegmentation.py +++ b/awe_components/components/contentSegmentation.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 # Copyright 2022, Educational Testing Service -from .utility_functions import * +from .utility_functions import \ + match_related_form, getRoot, \ + in_past_tense_scope, newSpanEntry, \ + AWE_Info + from operator import itemgetter -import spacy -from spacy.tokens import Token, Doc +from spacy.tokens import Doc from spacy.language import Language import wordfreq diff --git a/awe_components/components/lexicalClusters.py b/awe_components/components/lexicalClusters.py index 42e898c..796ac19 100644 --- a/awe_components/components/lexicalClusters.py +++ b/awe_components/components/lexicalClusters.py @@ -2,24 +2,18 @@ # Copyright 2022, Educational Testing Service import re -import spacy -import srsly +import json import wordfreq import numpy as np -import os from collections import OrderedDict -from scipy.spatial.distance import cosine -# Standard cosine distance metric - from sklearn.preprocessing import StandardScaler from sklearn.cluster import AgglomerativeClustering from spacy.tokens import Token, Doc from spacy.language import Language -from .utility_functions import * -from ..errors import * +from .utility_functions import ResolveReference, all_zeros, AWE_Info lang = "en" @@ -480,7 +474,7 @@ def devword(token): # flag assignClusterIDs to run # by setting it to a non None value token.doc._.clusterInfo_ = [] - self.assignClusterIDs(token.doc) + assignClusterIDs(token.doc) devlist = [token.text \ for token \ in developmentContentWords(token.doc)] diff --git a/awe_components/components/lexicalFeatures.py b/awe_components/components/lexicalFeatures.py index 0de6b01..cc4e282 100644 --- a/awe_components/components/lexicalFeatures.py +++ b/awe_components/components/lexicalFeatures.py @@ -19,18 +19,14 @@ import importlib.resources import math -import numpy as np import os -import re -from varname import nameof # English dictionary. Contains information on senses associated with words # (a lot more, but that's what we're currently using it for) from nltk.corpus import wordnet from scipy.spatial.distance import cosine # Standard cosine distance metric from spacy.language import Language -from spacy.tokens import Doc, Span, Token -from spacy.vocab import Vocab +from spacy.tokens import Doc, Token import srsly import statistics # https://github.com/rspeer/wordfreq @@ -41,7 +37,17 @@ import awe_lexica -from .utility_functions import * # <-- Paul, import only what you need here +from .utility_functions import \ + setExtensionFunctions, alphanum_word, \ + sylco, content_tags, \ + ResolveReference, AWE_Info, \ + possessive_or_determiner, personal_or_indefinite_pronoun, \ + all_zeros, is_temporal, \ + locative_adverbs, existential_there, \ + major_locative_prepositions, all_locative_prepositions, \ + loc_sverbs, loc_overbs, \ + deictics + from ..errors import LexiconMissingError def lexicon_path(lexicon): diff --git a/awe_components/components/syntaxDiscourseFeats.py b/awe_components/components/syntaxDiscourseFeats.py index 7a0a63e..08d8acc 100644 --- a/awe_components/components/syntaxDiscourseFeats.py +++ b/awe_components/components/syntaxDiscourseFeats.py @@ -1,26 +1,25 @@ #!/usr/bin/env python3 # Copyright 2022, Educational Testing Service -import math import os import srsly -from varname import nameof -from enum import Enum -from spacy.tokens import Doc, Span, Token +from spacy.tokens import Doc, Token from spacy.language import Language from scipy.spatial.distance import cosine # Standard cosine distance metric -from .utility_functions import * -from ..errors import * -from importlib import resources - -from nltk.corpus import wordnet -# English dictionary. Contains information on senses associated with words -# (a lot more, but that's what we're currently using it for) +from .utility_functions import \ + setExtensionFunctions, AWE_Info, \ + in_past_tense_scope, getRoot, \ + temporalPhrase, newSpanEntry, \ + adj_noun_or_verb, content_tags, \ + possessive_or_determiner, ResolveReference, \ + tensed_clause +from importlib import resources +from ..errors import LexiconMissingError @Language.factory("syntaxdiscoursefeatures") def SyntaxAndDiscourseFeatures(nlp, name): @@ -45,21 +44,20 @@ class SyntaxAndDiscourseFeatDef(object): ) as filepath: TRANSITION_CATEGORIES_PATH = filepath - datapaths = [{'pathname': nameof(TRANSITION_TERMS_PATH), - 'value': TRANSITION_TERMS_PATH}, - {'pathname': nameof(TRANSITION_CATEGORIES_PATH), - 'value': TRANSITION_CATEGORIES_PATH}] - transition_terms = {} transition_categories = {} def package_check(self, lang): - for path in self.datapaths: - if not os.path.exists(path['value']): - raise LexiconMissingError( - "Trying to load AWE Workbench Lexicon Module \ - without {name} datafile".format(name=path['pathname']) - ) + if not os.path.exists(self.TRANSITION_TERMS_PATH): + raise LexiconMissingError( + "Trying to load AWE Workbench Syntax and Discourse Feature \ + Module without supporting datafile {}".format(self.TRANSITION_TERMS_PATH) + ) + if not os.path.exists(self.TRANSITION_CATEGORIES_PATH): + raise LexiconMissingError( + "Trying to load AWE Workbench Syntax and Discourse Feature \ + Module without supporting datafile {}".format(self.TRANSITION_CATEGORIES_PATH) + ) def load_lexicons(self, lang): self.transition_terms = \ diff --git a/awe_components/components/viewpointFeatures.py b/awe_components/components/viewpointFeatures.py index 92d10ef..bb64155 100644 --- a/awe_components/components/viewpointFeatures.py +++ b/awe_components/components/viewpointFeatures.py @@ -3,10 +3,7 @@ import os import srsly -import imp -from enum import Enum -from collections import OrderedDict from spacy.tokens import Doc, Span, Token from spacy.language import Language @@ -16,8 +13,95 @@ from nltk.corpus import wordnet # (a lot more, but that's what we're currently using it for) -from .utility_functions import * -from ..errors import * +from .utility_functions import \ + AWE_Info, \ + absolute_degree, \ + adjectival_complement_dependencies, \ + adjectival_mod_dependencies , \ + adjectival_predicates, \ + animate_ent_type , \ + auxiliary_dependencies, \ + auxiliary_or_adverb, \ + be_verbs , \ + clausal_complements , \ + clausal_modifier_dependencies , \ + clausal_subject_or_complement, \ + common_evaluation_adjective, \ + common_hedge_word, \ + complements , \ + containsDistinctReference, \ + content_pos , \ + contracted_verb, \ + contraction, \ + core_temporal_preps , \ + coreViewpointPredicate, \ + dative_preps , \ + demonstratives , \ + elliptical_verb, \ + emphatic_adjective, \ + emphatic_adjective, \ + emphatic_adverb, \ + first_person_pronouns , \ + function_word_tags , \ + generalArgumentPredicate, \ + general_complements_and_modifiers , \ + generalViewpointPredicate, \ + getDative, \ + getLightVerbs, \ + getLinkedNodes, \ + getLogicalObject, \ + getObject, \ + getPrepObject, \ + getRoot, \ + getRoots, \ + getSubject, \ + getSubject, \ + getTensedVerbHead, \ + illocutionary_tag, \ + inanimate_3sg_pronouns, \ + indefinite_comparison, \ + indefinite_pronoun , \ + in_modal_scope, \ + in_past_tense_scope, \ + is_definite_nominal, \ + isRoot, \ + loose_clausal_dependencies , \ + newSpanEntry, \ + newTokenEntry, \ + nominal_pos , \ + nonhuman_ent_type , \ + object_predicate_dependencies , \ + object_predicate_dependencies, \ + other_conversational_idioms, \ + other_conversational_vocabulary, \ + personal_or_indefinite_pronoun , \ + personal_or_indefinite_pronoun , \ + pos_degree_mod , \ + prehead_modifiers2 , \ + present_semimodals , \ + private_mental_state_tag, \ + quantifying_determiners, \ + quotationMark, \ + raising_complement, \ + ResolveReference, \ + rootTree, \ + scanForAnimatePotentialAntecedents, \ + second_person_pronouns , \ + setExtensionFunctions, \ + stance_adverb, \ + stancePredicate, \ + subject_dependencies , \ + subject_or_object_nom , \ + takesBareInfinitive, \ + tensed_clause, \ + third_person_pronouns , \ + tough_complement, \ + underlying_object_dependencies , \ + verbal_mod_dependencies , \ + verbal_pos , \ + wh_question_word + +from ..errors import LexiconMissingError from importlib import resources @@ -65,13 +149,13 @@ class ViewpointFeatureDef: def package_check(self, lang): if not os.path.exists(self.STANCE_PERSPECTIVE_PATH): raise LexiconMissingError( - "Trying to load AWE Workbench Syntaxa and Discourse Feature \ - Module without supporting datafile {}".format(filepath) + "Trying to load AWE Workbench Syntax and Discourse Feature \ + Module without supporting datafile {}".format(self.STANCE_PERSPECTIVE_PATH) ) if not os.path.exists(self.MORPHOLEX_PATH): raise LexiconMissingError( - "Trying to load AWE Workbench Syntaxa and Discourse Feature \ - Module without supporting datafile {}".format(filepath) + "Trying to load AWE Workbench Syntax and Discourse Feature \ + Module without supporting datafile {}".format(self.MORPHOLEX_PATH) ) def load_lexicon(self, lang): diff --git a/extensions.txt b/extensions.txt new file mode 100644 index 0000000..8dc3ff2 --- /dev/null +++ b/extensions.txt @@ -0,0 +1,61 @@ +AWE_Info +all_cluster_info +assessments +blob +clusterInfo +clusterInfo_ +concrete_details +corefChainInfo +coref_chains +direct_speech_spans +has_governing_subject +intersentence_cohesions +main_cluster_spans +main_cluster_spans_ +main_ideas +main_ideas_ +negation_tokens +nominalReferences +polarity +prompt +prompt_ +prompt_language +prompt_language_ +prompt_related +prompt_related_ +propositional_attitudes_ +sentenceThemes +sentence_types +sliding_window_cohesions +subjectivity +supporting_details +supporting_details_ +supporting_ideas +supporting_ideas_ +syntacticDepthsOfRhemes +syntacticDepthsOfThemes +syntacticProfile +syntacticProfileNormed +syntacticVariety +tense_changes +token_vectors +transition_distances +transition_word_profile +transition_word_profile_ +transitions +vwp_allocentric +vwp_argumentation +vwp_character_traits +vwp_direct_speech +vwp_egocentric +vwp_emotion_states +vwp_interactive +vwp_perspective_spans +vwp_perspective_spans_ +vwp_propositional_attitudes +vwp_quoted +vwp_social_awareness +vwp_stance_markers +vwp_stance_markers_ +vwp_statements_of_fact +vwp_statements_of_opinion diff --git a/setup.cfg b/setup.cfg index 66901e6..3bd4d44 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,7 +33,7 @@ cmdclass = install_requires = awe_lexica @ git+https://github.com/ArgLab/AWE_Lexica.git spacy - coreferee + coreferee @ git+https://github.com/Arglab/coreferee.git@latest_spacy rdflib spacytextblob numpy==1.26.4