Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 10 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,27 @@ which can be installed into a Spacy pipeline. They annotate the Spacy
parse tree with additional attributes that make it easy to summarize
information about features of student writing.

Before You Install
------------

It is helpful to note that the use of AWE Components is best tested using [AWE_Workbench](https://github.com/ArgLab/AWE_Workbench), which utilizes the features defined in AWE Components. There are a series of automatic tests which can be run to verify or validate AWE Components; in addition, there are examples, a web server for parsing documents, and an interactive document highlighting tool for visualizing the document features which are derived from AWE Components.

See AWE Workbench's installations steps and verify that you'd want to use it instead of installing AWE Components directly.

Installation
------------

Set up Python 3.9. 3.8 will *not* work. If you wish to use `conda`:
Set up Python 3.11. If you wish to use `conda`:

conda create -n test_install python=3.9 pip
conda create -n test_install python=3.11 pip
pip install pip --upgrade
conda activate test_install

If you wish to use plain old `pip` with `virtualenvwrapper`:

mkvirtualenv awe_components --python=/usr/bin/python3.9
mkvirtualenv awe_components --python=/usr/bin/python3.11
pip install pip --upgrade

Install prerequisites:

[Holmes Extractor Expandable](https://github.com/ETS-Next-Gen/holmes-extractor-expandable):

git clone git@github.com:ETS-Next-Gen/holmes-extractor-expandable.git
cd holmes-extractor-expandable/~
pip install .

[AWE Language Tool](https://github.com/ETS-Next-Gen/AWE_LanguageTool):

git clone git@github.com:ETS-Next-Gen/AWE_LanguageTool.git
cd AWE_LanguageTool/
pip install .

[AWE Spell Correct](https://github.com/ETS-Next-Gen/AWE_SpellCorrect)

git clone git@github.com:ETS-Next-Gen/AWE_SpellCorrect.git
cd AWE_SpellCorrect/
pip install .

[AWE Lexica](https://github.com/ETS-Next-Gen/AWE_Lexica)

git clone git@github.com:ETS-Next-Gen/AWE_Lexica.git
cd AWE_Lexica/
pip install .

Then from the AWE Workbench Components directory:

pip install .
Expand Down
9 changes: 6 additions & 3 deletions awe_components/components/contentSegmentation.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#!/usr/bin/env python3
# Copyright 2022, Educational Testing Service

from .utility_functions import *
from .utility_functions import \
match_related_form, getRoot, \
in_past_tense_scope, newSpanEntry, \
AWE_Info

from operator import itemgetter
import spacy
from spacy.tokens import Token, Doc
from spacy.tokens import Doc
from spacy.language import Language
import wordfreq

Expand Down
12 changes: 3 additions & 9 deletions awe_components/components/lexicalClusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,18 @@
# Copyright 2022, Educational Testing Service

import re
import spacy
import srsly
import json
import wordfreq
import numpy as np
import os
from collections import OrderedDict

from scipy.spatial.distance import cosine
# Standard cosine distance metric

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering

from spacy.tokens import Token, Doc
from spacy.language import Language

from .utility_functions import *
from ..errors import *
from .utility_functions import ResolveReference, all_zeros, AWE_Info

lang = "en"

Expand Down Expand Up @@ -480,7 +474,7 @@ def devword(token):
# flag assignClusterIDs to run
# by setting it to a non None value
token.doc._.clusterInfo_ = []
self.assignClusterIDs(token.doc)
assignClusterIDs(token.doc)
devlist = [token.text \
for token \
in developmentContentWords(token.doc)]
Expand Down
18 changes: 12 additions & 6 deletions awe_components/components/lexicalFeatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,14 @@

import importlib.resources
import math
import numpy as np
import os
import re
from varname import nameof

# English dictionary. Contains information on senses associated with words
# (a lot more, but that's what we're currently using it for)
from nltk.corpus import wordnet
from scipy.spatial.distance import cosine # Standard cosine distance metric
from spacy.language import Language
from spacy.tokens import Doc, Span, Token
from spacy.vocab import Vocab
from spacy.tokens import Doc, Token
import srsly
import statistics
# https://github.com/rspeer/wordfreq
Expand All @@ -41,7 +37,17 @@

import awe_lexica

from .utility_functions import * # <-- Paul, import only what you need here
from .utility_functions import \
setExtensionFunctions, alphanum_word, \
sylco, content_tags, \
ResolveReference, AWE_Info, \
possessive_or_determiner, personal_or_indefinite_pronoun, \
all_zeros, is_temporal, \
locative_adverbs, existential_there, \
major_locative_prepositions, all_locative_prepositions, \
loc_sverbs, loc_overbs, \
deictics

from ..errors import LexiconMissingError

def lexicon_path(lexicon):
Expand Down
42 changes: 20 additions & 22 deletions awe_components/components/syntaxDiscourseFeats.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
#!/usr/bin/env python3
# Copyright 2022, Educational Testing Service

import math
import os
import srsly
from varname import nameof

from enum import Enum
from spacy.tokens import Doc, Span, Token
from spacy.tokens import Doc, Token
from spacy.language import Language

from scipy.spatial.distance import cosine
# Standard cosine distance metric

from .utility_functions import *
from ..errors import *
from importlib import resources

from nltk.corpus import wordnet
# English dictionary. Contains information on senses associated with words
# (a lot more, but that's what we're currently using it for)
from .utility_functions import \
setExtensionFunctions, AWE_Info, \
in_past_tense_scope, getRoot, \
temporalPhrase, newSpanEntry, \
adj_noun_or_verb, content_tags, \
possessive_or_determiner, ResolveReference, \
tensed_clause

from importlib import resources
from ..errors import LexiconMissingError

@Language.factory("syntaxdiscoursefeatures")
def SyntaxAndDiscourseFeatures(nlp, name):
Expand All @@ -45,21 +44,20 @@ class SyntaxAndDiscourseFeatDef(object):
) as filepath:
TRANSITION_CATEGORIES_PATH = filepath

datapaths = [{'pathname': nameof(TRANSITION_TERMS_PATH),
'value': TRANSITION_TERMS_PATH},
{'pathname': nameof(TRANSITION_CATEGORIES_PATH),
'value': TRANSITION_CATEGORIES_PATH}]

transition_terms = {}
transition_categories = {}

def package_check(self, lang):
for path in self.datapaths:
if not os.path.exists(path['value']):
raise LexiconMissingError(
"Trying to load AWE Workbench Lexicon Module \
without {name} datafile".format(name=path['pathname'])
)
if not os.path.exists(self.TRANSITION_TERMS_PATH):
raise LexiconMissingError(
"Trying to load AWE Workbench Syntax and Discourse Feature \
Module without supporting datafile {}".format(self.TRANSITION_TERMS_PATH)
)
if not os.path.exists(self.TRANSITION_CATEGORIES_PATH):
raise LexiconMissingError(
"Trying to load AWE Workbench Syntax and Discourse Feature \
Module without supporting datafile {}".format(self.TRANSITION_CATEGORIES_PATH)
)

def load_lexicons(self, lang):
self.transition_terms = \
Expand Down
102 changes: 93 additions & 9 deletions awe_components/components/viewpointFeatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@

import os
import srsly
import imp

from enum import Enum
from collections import OrderedDict
from spacy.tokens import Doc, Span, Token
from spacy.language import Language

Expand All @@ -16,8 +13,95 @@
from nltk.corpus import wordnet
# (a lot more, but that's what we're currently using it for)

from .utility_functions import *
from ..errors import *
from .utility_functions import \
AWE_Info, \
absolute_degree, \
adjectival_complement_dependencies, \
adjectival_mod_dependencies , \
adjectival_predicates, \
animate_ent_type , \
auxiliary_dependencies, \
auxiliary_or_adverb, \
be_verbs , \
clausal_complements , \
clausal_modifier_dependencies , \
clausal_subject_or_complement, \
common_evaluation_adjective, \
common_hedge_word, \
complements , \
containsDistinctReference, \
content_pos , \
contracted_verb, \
contraction, \
core_temporal_preps , \
coreViewpointPredicate, \
dative_preps , \
demonstratives , \
elliptical_verb, \
emphatic_adjective, \
emphatic_adjective, \
emphatic_adverb, \
first_person_pronouns , \
function_word_tags , \
generalArgumentPredicate, \
general_complements_and_modifiers , \
generalViewpointPredicate, \
getDative, \
getLightVerbs, \
getLinkedNodes, \
getLogicalObject, \
getObject, \
getPrepObject, \
getRoot, \
getRoots, \
getSubject, \
getSubject, \
getTensedVerbHead, \
illocutionary_tag, \
inanimate_3sg_pronouns, \
indefinite_comparison, \
indefinite_pronoun , \
in_modal_scope, \
in_past_tense_scope, \
is_definite_nominal, \
isRoot, \
loose_clausal_dependencies , \
newSpanEntry, \
newTokenEntry, \
nominal_pos , \
nonhuman_ent_type , \
object_predicate_dependencies , \
object_predicate_dependencies, \
other_conversational_idioms, \
other_conversational_vocabulary, \
personal_or_indefinite_pronoun , \
personal_or_indefinite_pronoun , \
pos_degree_mod , \
prehead_modifiers2 , \
present_semimodals , \
private_mental_state_tag, \
quantifying_determiners, \
quotationMark, \
raising_complement, \
ResolveReference, \
rootTree, \
scanForAnimatePotentialAntecedents, \
second_person_pronouns , \
setExtensionFunctions, \
stance_adverb, \
stancePredicate, \
subject_dependencies , \
subject_or_object_nom , \
takesBareInfinitive, \
tensed_clause, \
third_person_pronouns , \
tough_complement, \
underlying_object_dependencies , \
verbal_mod_dependencies , \
verbal_pos , \
wh_question_word

from ..errors import LexiconMissingError
from importlib import resources


Expand Down Expand Up @@ -65,13 +149,13 @@ class ViewpointFeatureDef:
def package_check(self, lang):
if not os.path.exists(self.STANCE_PERSPECTIVE_PATH):
raise LexiconMissingError(
"Trying to load AWE Workbench Syntaxa and Discourse Feature \
Module without supporting datafile {}".format(filepath)
"Trying to load AWE Workbench Syntax and Discourse Feature \
Module without supporting datafile {}".format(self.STANCE_PERSPECTIVE_PATH)
)
if not os.path.exists(self.MORPHOLEX_PATH):
raise LexiconMissingError(
"Trying to load AWE Workbench Syntaxa and Discourse Feature \
Module without supporting datafile {}".format(filepath)
"Trying to load AWE Workbench Syntax and Discourse Feature \
Module without supporting datafile {}".format(self.MORPHOLEX_PATH)
)

def load_lexicon(self, lang):
Expand Down
Loading