From d55833756e40e9e13748f8a3f11ce4cb8b954cc8 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Fri, 17 May 2024 10:50:16 -0400 Subject: [PATCH 01/39] Added gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5c31c44 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +# Ignore the pycache and egg python directories +__pycache__/ +*.egg-info/ +*.egg From 651b9261d80093c846dc5ce53d79f29adb8c9b34 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Fri, 17 May 2024 11:01:37 -0400 Subject: [PATCH 02/39] Removed typo --- awe_workbench/web/parserServer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index a4737d0..41b5a5b 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -455,7 +455,6 @@ async def run_parser(self, websocket, path): command = 'LEMMAS' label = messagelist[1] doc = self.parser.get_document(label) -\ await websocket.send(doc._.AWE_Info(indicator='lemma_')) elif messagelist[0] == 'STOPWORDS': label = messagelist[1] From ca744c422daa22ae7669a77407319822383b8d27 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Mon, 20 May 2024 10:07:44 -0400 Subject: [PATCH 03/39] Added missing test for lexical features --- tests/test_lexical_features.py | 457 +++++++++++++++++++++++++++++++++ 1 file changed, 457 insertions(+) create mode 100644 tests/test_lexical_features.py diff --git a/tests/test_lexical_features.py b/tests/test_lexical_features.py new file mode 100644 index 0000000..7002cb4 --- /dev/null +++ b/tests/test_lexical_features.py @@ -0,0 +1,457 @@ +#!/usr/bin/env python3.10 +# Copyright 2022, Educational Testing Service + +import holmes_extractor.manager as holmes +import unittest +from awe_workbench.pipeline import pipeline_def + +holmes_manager = holmes.Manager( + 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) + +# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses +holmes_manager.parse_and_register_document( + document_text="The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", label='GRE_Sample_Essay') + + +class LexicalFeatureTest(unittest.TestCase): + + def test_lemmas(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + lemmas = ['the', 'statement', 'link', 'technology', 'negatively', 'with', 'free', 'thinking', 'play', 'on', 'recent', 'human', 'experience', 'over', 'the', 'past', 'century', None, 'surely', 'there', 'have', 'be', 'no', 'time', 'in', 'history', 'where', 'the', 'live', 'life', 'of', 'people', 'have', 'change', 'more', 'dramatically', None, 'a', 'quick', 'reflection', 'on', 'a', 'typical', 'day', 'reveal', 'how', 'technology', 'have', 'revolutionize', 'the', 'world', None, 'Most', 'people', 'commute', 'to', 'work', 'in', 'an', 'automobile', 'that', 'run', 'on', 'an', 'internal', 'combustion', 'engine', None, 'during', 'the', 'workday', None, 'chance', 'be', 'high', 'that', 'the', 'employee', 'will', 'interact', 'with', 'a', 'computer', 'that', 'process', 'information', 'on', 'silicon', 'bridge', 'that', 'be', None, 'micron', 'wide', None, 'upon', 'leave', 'home', None, 'family', 'member', 'will', 'be', 'reach', 'through', 'wireless', 'network', 'that', 'utilize', 'satellite', 'orbit', 'the', 'earth', None, 'each', 'of', 'these', 'common', 'occurrence', 'could', 'have', 'be', 'inconceivable', 'at', 'the', 'turn', 'of', 'the', '19th', 'century', None, None, 'the', 'statement', 'attempt', 'to', 'bridge', 'these', 'dramatic', 'change', 'to', 'a', 'reduction', 'in', 'the', 'ability', 'for', 'human', 'to', 'think', 'for', 'themselves', None, 'the', 'assumption', 'be', 'that', 'an', 'increase', 'reliance', 'on', 'technology', 'negate', 'the', 'need', 'for', 'people', 'to', 'think', 'creatively', 'to', 'solve', 'previous', 'quandary', None, 'look', 'back', 'at', 'the', 'introduction', None, 'one', 'could', 'argue', 'that', 'without', 'a', 'car', None, 'computer', None, 'or', 'mobile', 'phone', None, 'the', 'hypothetical', 'worker', 'would', 'need', 'to', 'find', 'alternate', 'method', 'of', 'transport', None, 'information', 'processing', 'and', 'communication', None, 'technology', 'short', 'circuit', 'this', 'thinking', 'by', 'make', 'the', 'problem', 'obsolete', None, None, 'however', None, 'this', 'reliance', 'on', 'technology', 'do', 'not', 'necessarily', 'preclude', 'the', 'creativity', 'that', 'mark', 'the', 'human', 'specie', None, 'the', 'prior', 'example', 'reveal', 'that', 'technology', 'allow', 'for', 'convenience', None, 'the', 'car', None, 'computer', 'and', 'phone', 'all', 'release', 'additional', 'time', 'for', 'people', 'to', 'live', 'more', 'efficiently', None, 'this', 'efficiency', 'do', 'not', 'preclude', 'the', 'need', 'for', 'human', 'to', 'think', 'for', 'themselves', None, 'in', 'fact', None, 'technology', 'free', 'humanity', 'to', 'not', 'only', 'tackle', 'new', 'problem', None, 'but', 'may', 'itself', 'create', 'new', 'issue', 'that', 'do', 'not', 'exist', 'without', 'technology', None, 'for', 'example', None, 'the', 'proliferation', 'of', 'automobile', 'have', 'introduce', 'a', 'need', 'for', 'fuel', 'conservation', 'on', 'a', 'global', 'scale', None, 'with', 'increase', 'energy', 'demand', 'from', 'emerge', 'market', None, 'global', 'warming', 'become', 'a', 'concern', 'inconceivable', 'to', 'the', 'horse', None, 'and', None, 'buggy', 'generation', None, 'likewise', 'dependence', 'on', 'oil', 'have', 'create', 'nation', None, 'state', 'that', 'be', 'not', 'dependent', 'on', 'taxation', None, 'allow', 'rule', 'party', 'to', 'oppress', 'minority', 'group', 'such', 'as', 'woman', None, 'solution', 'to', 'these', 'complex', 'problem', 'require', 'the', 'unfettered', 'imagination', 'of', 'maverick', 'scientist', 'and', 'politician', None, None, 'in', 'contrast', 'to', 'the', 'statement', None, 'we', 'can', 'even', 'see', 'how', 'technology', 'free', 'the', 'human', 'imagination', None, 'consider', 'how', 'the', 'digital', 'revolution', 'and', 'the', 'advent', 'of', 'the', 'internet', 'have', 'allow', 'for', 'an', 'unprecedented', 'exchange', 'of', 'idea', None, 'WebMD', None, 'a', 'popular', 'internet', 'portal', 'for', 'medical', 'information', None, 'permit', 'patient', 'to', 'self', 'research', 'symptom', 'for', 'a', 'more', 'informed', 'doctor', 'visit', None, 'this', 'exercise', 'open', 'pathway', 'of', 'thinking', 'that', 'be', 'previously', 'close', 'off', 'to', 'the', 'medical', 'layman', None, 'with', 'increase', 'interdisciplinary', 'interaction', None, 'inspiration', 'can', 'arrive', 'from', 'the', 'most', 'surprising', 'corner', None, 'Jeffrey', 'Sachs', None, 'one', 'of', 'the', 'architect', 'of', 'the', 'UN', 'Millenium', 'Development', 'Goals', None, 'base', 'his', 'idea', 'on', 'emergency', 'care', 'triage', 'technique', None, 'the', 'unlikely', 'marriage', 'of', 'economic', 'and', 'medicine', 'have', 'heal', 'tense', None, 'hyperinflation', 'environment', 'from', 'South', 'America', 'to', 'Eastern', 'Europe', None, None, 'this', 'last', 'example', 'provide', 'the', 'most', 'hope', 'in', 'how', 'technology', 'actually', 'provide', 'hope', 'to', 'the', 'future', 'of', 'humanity', None, 'by', 'increase', 'our', 'reliance', 'on', 'technology', None, 'impossible', 'goal', 'can', 'now', 'be', 'achieve', None, 'consider', 'how', 'the', 'late', '20th', 'century', 'witness', 'the', 'complete', 'elimination', 'of', 'smallpox', None, 'this', 'disease', 'have', 'ravage', 'the', 'human', 'race', 'since', 'prehistorical', 'day', None, 'and', 'yet', 'with', 'the', 'technology', 'of', 'vaccine', None, 'free', 'thinking', 'human', 'dare', 'to', 'imagine', 'a', 'world', 'free', 'of', 'smallpox', None, 'use', 'technology', None, 'battle', 'plan', 'be', 'draw', 'out', None, 'and', 'smallpox', 'be', 'systematically', 'target', 'and', 'eradicate', None, None, 'Technology', 'will', 'always', 'mark', 'the', 'human', 'experience', None, 'from', 'the', 'discovery', 'of', 'fire', 'to', 'the', 'implementation', 'of', 'nanotechnology', None, 'give', 'the', 'history', 'of', 'the', 'human', 'race', None, 'there', 'will', 'be', 'no', 'limit', 'to', 'the', 'number', 'of', 'problem', None, 'both', 'new', 'and', 'old', None, 'for', 'we', 'to', 'tackle', None, 'there', 'be', 'no', 'need', 'to', 'retreat', 'to', 'a', 'Luddite', 'attitude', 'to', 'new', 'thing', None, 'but', 'rather', 'embrace', 'a', 'hopeful', 'posture', 'to', 'the', 'possibility', 'that', 'technology', 'provide', 'for', 'new', 'avenue', 'of', 'human', 'imagination', None] + self.assertEqual(doc._.lemmas,lemmas) + + def test_word_types(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + word_types = ['19th', '20th', 'A', 'America', 'By', 'Consider', 'Development', 'During', 'Each', 'Eastern', 'Europe', 'For', 'Given', 'Goals', 'However', 'In', 'Jeffrey', 'Likewise', 'Looking', 'Luddite', 'Millenium', 'Most', 'Sachs', 'Solutions', 'South', 'Surely', 'Technology', 'The', 'There', 'This', 'UN', 'Upon', 'Using', 'WebMD', 'With', 'a', 'ability', 'achieved', 'actually', 'additional', 'advent', 'all', 'allowed', 'allowing', 'allows', 'alternate', 'always', 'an', 'and', 'architects', 'are', 'argue', 'arrive', 'as', 'assumption', 'at', 'attempts', 'attitude', 'automobile', 'automobiles', 'avenues', 'back', 'based', 'battle', 'be', 'becomes', 'been', 'both', 'bridge', 'bridges', 'buggy', 'but', 'by', 'can', 'car', 'care', 'century', 'chances', 'changed', 'changes', 'circuits', 'closed', 'combustion', 'common', 'communication', 'commute', 'complete', 'complex', 'computer', 'concern', 'conservation', 'contrast', 'convenience', 'corners', 'could', 'create', 'created', 'creatively', 'creativity', 'dared', 'day', 'days', 'demands', 'dependence', 'dependent', 'did', 'digital', 'discovery', 'disease', 'doctor', 'does', 'dramatic', 'dramatically', 'drawn', 'earth', 'economics', 'efficiency', 'efficiently', 'elimination', 'embrace', 'emergency', 'emerging', 'employee', 'energy', 'engine', 'environments', 'eradicated', 'even', 'example', 'examples', 'exchange', 'exercise', 'exist', 'experience', 'fact', 'family', 'find', 'fire', 'for', 'free', 'frees', 'from', 'fuel', 'future', 'generation', 'global', 'goals', 'groups', 'had', 'has', 'have', 'healed', 'high', 'his', 'history', 'home', 'hope', 'hopeful', 'horse', 'how', 'human', 'humanity', 'humans', 'hyperinflation', 'hypothetical', 'ideas', 'imagination', 'imaginations', 'imagine', 'implementation', 'impossible', 'in', 'inconceivable', 'increased', 'increasing', 'information', 'informed', 'inspiration', 'interact', 'interactions', 'interdisciplinary', 'internal', 'internet', 'introduced', 'introduction', 'is', 'issues', 'itself', 'last', 'late', 'layman', 'leaving', 'limit', 'linking', 'live', 'lived', 'lives', 'making', 'mark', 'markets', 'marks', 'marriage', 'maverick', 'may', 'medical', 'medicine', 'members', 'methods', 'microns', 'minority', 'mobile', 'more', 'most', 'nanotechnology', 'nation', 'necessarily', 'need', 'negates', 'negatively', 'networks', 'new', 'no', 'not', 'now', 'number', 'obsolete', 'occurrences', 'of', 'off', 'oil', 'old', 'on', 'one', 'only', 'opens', 'oppress', 'or', 'orbiting', 'our', 'out', 'over', 'parties', 'past', 'pathways', 'patients', 'people', 'permits', 'phone', 'plans', 'plays', 'politicians', 'popular', 'portal', 'possibilities', 'posture', 'preclude', 'prehistorical', 'previous', 'previously', 'prior', 'problems', 'processes', 'processing', 'proliferation', 'provides', 'quandaries', 'quick', 'race', 'rather', 'ravaged', 'reached', 'recent', 'reduction', 'reflection', 'release', 'reliance', 'require', 'research', 'retreat', 'reveal', 'reveals', 'revolution', 'revolutionized', 'ruling', 'runs', 'satellites', 'scale', 'scientists', 'see', 'self', 'short', 'silicon', 'since', 'smallpox', 'solve', 'species', 'statement', 'states', 'such', 'surprising', 'symptoms', 'systematically', 'tackle', 'targeted', 'taxation', 'techniques', 'technology', 'tense', 'that', 'the', 'themselves', 'there', 'these', 'things', 'think', 'thinking', 'this', 'through', 'time', 'to', 'transport', 'triage', 'turn', 'typical', 'unfettered', 'unlikely', 'unprecedented', 'us', 'utilize', 'vaccines', 'visit', 'warming', 'was', 'we', 'were', 'where', 'wide', 'will', 'wireless', 'with', 'without', 'witnessed', 'women', 'work', 'workday', 'worker', 'world', 'would', 'yet'] + self.assertEqual(doc._.word_types,word_types) + + def test_morphroot(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + morphroot = ['the', 'state', 'link', 'technology', 'negative', 'with', 'free', 'think', 'play', 'on', 'recent', 'human', 'experience', 'over', 'the', 'past', 'century', None, 'sure', 'there', 'have', 'be', 'no', 'time', 'in', 'history', 'where', 'the', 'live', 'live', 'of', 'people', 'have', 'change', 'more', 'drama', None, 'a', 'quick', 'reflect', 'on', 'a', 'typical', 'day', 'reveal', 'how', 'technology', 'have', 'revolution', 'the', 'world', None, 'most', 'people', 'commute', 'to', 'work', 'in', 'a', 'automobile', 'that', 'run', 'on', 'a', 'internal', 'combustion', 'engine', None, 'during', 'the', 'workday', None, 'chance', 'be', 'high', 'that', 'the', 'employ', 'will', 'interact', 'with', 'a', 'computer', 'that', 'process', 'inform', 'on', 'silicon', 'bridge', 'that', 'be', None, 'micron', 'wide', None, 'upon', 'leave', 'home', None, 'family', 'member', 'will', 'be', 'reach', 'through', 'wireless', 'network', 'that', 'utilise', 'satellite', 'orbit', 'the', 'earth', None, 'each', 'of', 'this', 'common', 'occur', 'could', 'have', 'be', 'conceive', 'at', 'the', 'turn', 'of', 'the', '19th', 'century', None, None, 'the', 'state', 'attempt', 'to', 'bridge', 'this', 'drama', 'change', 'to', 'a', 'reduce', 'in', 'the', 'able', 'for', 'human', 'to', 'think', 'for', 'they', None, 'the', 'assumption', 'be', 'that', 'a', 'increase', 'rely', 'on', 'technology', 'negate', 'the', 'need', 'for', 'people', 'to', 'think', 'create', 'to', 'solve', 'previous', 'quandary', None, 'look', 'back', 'at', 'the', 'introduce', None, 'one', 'could', 'argue', 'that', 'without', 'a', 'car', None, 'computer', None, 'or', 'mobile', 'telephone', None, 'the', 'hypothetical', 'work', 'would', 'need', 'to', 'find', 'alternate', 'method', 'of', 'transport', None, 'inform', 'process', 'and', 'communicate', None, 'technology', 'short', 'circuit', 'this', 'think', 'by', 'make', 'the', 'problem', 'obsolete', None, None, 'however', None, 'this', 'rely', 'on', 'technology', 'do', 'not', 'necessary', 'preclude', 'the', 'create', 'that', 'mark', 'the', 'human', 'species', None, 'the', 'prior', 'example', 'reveal', 'that', 'technology', 'allow', 'for', 'convenience', None, 'the', 'car', None, 'computer', 'and', 'telephone', 'all', 'release', 'add', 'time', 'for', 'people', 'to', 'live', 'more', 'efficient', None, 'this', 'efficient', 'do', 'not', 'preclude', 'the', 'need', 'for', 'human', 'to', 'think', 'for', 'they', None, 'in', 'fact', None, 'technology', 'free', 'human', 'to', 'not', 'only', 'tackle', 'new', 'problem', None, 'but', 'may', 'it', 'create', 'new', 'issue', 'that', 'do', 'not', 'exist', 'without', 'technology', None, 'for', 'example', None, 'the', 'proliferate', 'of', 'automobile', 'have', 'introduce', 'a', 'need', 'for', 'fuel', 'conserve', 'on', 'a', 'global', 'scale', None, 'with', 'increase', 'energy', 'demand', 'from', 'emerge', 'market', None, 'global', 'warm', 'become', 'a', 'concern', 'conceive', 'to', 'the', 'horse', None, 'and', None, 'buggy', 'generation', None, 'likewise', 'dependence', 'on', 'oil', 'have', 'create', 'nation', None, 'states', 'that', 'be', 'not', 'depend', 'on', 'tax', None, 'allow', 'rule', 'party', 'to', 'oppress', 'minor', 'group', 'such', 'as', 'woman', None, 'solve', 'to', 'this', 'complex', 'problem', 'require', 'the', 'fetter', 'imagine', 'of', 'maverick', 'science', 'and', 'politics', None, None, 'in', 'contrast', 'to', 'the', 'state', None, 'we', 'can', 'even', 'see', 'how', 'technology', 'free', 'the', 'human', 'imagine', None, 'consider', 'how', 'the', 'digital', 'revolution', 'and', 'the', 'advent', 'of', 'the', 'internet', 'have', 'allow', 'for', 'a', 'precede', 'exchange', 'of', 'idea', None, 'WebMD', None, 'a', 'popular', 'internet', 'portal', 'for', 'medical', 'inform', None, 'permit', 'patients', 'to', 'self', 'research', 'symptom', 'for', 'a', 'more', 'inform', 'doctor', 'visit', None, 'this', 'exercise', 'open', 'pathway', 'of', 'think', 'that', 'be', 'previous', 'closed', 'off', 'to', 'the', 'medical', 'layman', None, 'with', 'increase', 'discipline', 'interact', None, 'inspire', 'can', 'arrive', 'from', 'the', 'most', 'surprise', 'corner', None, 'Jeffrey', 'Sachs', None, 'one', 'of', 'the', 'architect', 'of', 'the', 'un', 'millennium', 'develop', 'goal', None, 'base', 'he', 'idea', 'on', 'emergency', 'care', 'triage', 'technique', None, 'the', 'likely', 'marry', 'of', 'economy', 'and', 'medicine', 'have', 'heal', 'tense', None, 'inflate', 'environment', 'from', 'south', 'America', 'to', 'eastern', 'Europe', None, None, 'this', 'last', 'example', 'provide', 'the', 'most', 'hope', 'in', 'how', 'technology', 'actual', 'provide', 'hope', 'to', 'the', 'future', 'of', 'human', None, 'by', 'increase', 'we', 'rely', 'on', 'technology', None, 'possible', 'goal', 'can', 'now', 'be', 'achieve', None, 'consider', 'how', 'the', 'late', '20th', 'century', 'witness', 'the', 'complete', 'eliminate', 'of', 'smallpox', None, 'this', 'disease', 'have', 'ravage', 'the', 'human', 'race', 'since', 'prehistorical', 'day', None, 'and', 'yet', 'with', 'the', 'technology', 'of', 'vaccine', None, 'free', 'think', 'human', 'dare', 'to', 'imagine', 'a', 'world', 'free', 'of', 'smallpox', None, 'use', 'technology', None, 'battle', 'plan', 'be', 'draw', 'out', None, 'and', 'smallpox', 'be', 'system', 'target', 'and', 'eradicate', None, None, 'technology', 'will', 'always', 'mark', 'the', 'human', 'experience', None, 'from', 'the', 'discover', 'of', 'fire', 'to', 'the', 'implement', 'of', 'nanotechnology', None, 'give', 'the', 'history', 'of', 'the', 'human', 'race', None, 'there', 'will', 'be', 'no', 'limit', 'to', 'the', 'number', 'of', 'problem', None, 'both', 'new', 'and', 'old', None, 'for', 'we', 'to', 'tackle', None, 'there', 'be', 'no', 'need', 'to', 'retreat', 'to', 'a', 'luddite', 'attitude', 'to', 'new', 'thing', None, 'but', 'rather', 'embrace', 'a', 'hope', 'posture', 'to', 'the', 'possible', 'that', 'technology', 'provide', 'for', 'new', 'avenue', 'of', 'human', 'imagine', None] + self.assertEqual(doc._.morphroot,morphroot) + + def test_wf_type_count(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.wf_type_count,224) + + def test_lemma_type_count(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.lemma_type_count,252) + + + def test_lemma_type_count(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.type_count,265) + + def test_lemma_tokene_count(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.token_count,345) + + def test_syllable_counts(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + syllable_counts = [1, 2, 2, 4, 4, 1, 1, 2, 1, 1, 2, 2, 4, 2, 1, 1, 3, None, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 2, 1, 2, 1, 5, None, 1, 1, 3, 1, 1, 3, 1, 2, 1, 4, 1, 5, 1, 1, None, 1, 2, 2, 1, 1, 1, 1, 4, 1, 1, 1, 1, 3, 3, 2, None, 2, 1, 2, None, 2, 1, 1, 1, 1, 3, 1, 3, 1, 1, 3, 1, 3, 4, 1, 3, 2, 1, 1, None, 2, 1, None, 2, 2, 1, None, 3, 2, 1, 1, 2, 1, 2, 2, 1, 3, 4, 3, 1, 1, None, 1, 1, 1, 2, 4, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 3, None, None, 1, 2, 2, 1, 1, 1, 3, 2, 1, 1, 3, 1, 1, 4, 1, 2, 1, 1, 1, 2, None, 1, 3, 1, 1, 1, 2, 3, 1, 4, 3, 1, 1, 1, 2, 1, 1, 4, 1, 1, 3, 3, None, 2, 1, 1, 1, 4, None, 1, 1, 2, 1, 2, 1, 1, None, 3, None, 1, 2, 1, None, 1, 5, 2, 1, 1, 1, 1, 3, 2, 1, 2, None, 4, 3, 1, 5, None, 4, 1, 2, 1, 2, 1, 2, 1, 2, 3, None, None, 3, None, 1, 3, 1, 4, 1, 1, 5, 2, 1, 5, 1, 1, 1, 2, 2, None, 1, 2, 3, 2, 1, 4, 2, 1, 4, None, 1, 1, None, 3, 1, 1, 1, 2, 4, 1, 1, 2, 1, 1, 1, 4, None, 1, 4, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, None, 1, 1, None, 4, 1, 4, 1, 1, 2, 2, 1, 2, None, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 4, None, 1, 3, None, 1, 5, 1, 5, 1, 3, 1, 1, 1, 2, 4, 1, 1, 2, 1, None, 1, 3, 3, 2, 1, 3, 2, None, 2, 2, 3, 1, 2, 5, 1, 1, 1, None, 1, None, 2, 4, None, 2, 3, 1, 1, 1, 3, 2, None, 1, 1, 1, 1, 3, 1, 3, None, 3, 2, 2, 1, 2, 4, 1, 1, 1, 2, None, 3, 1, 1, 2, 2, 2, 1, 4, 5, 1, 3, 2, 1, 4, None, None, 1, 2, 1, 1, 2, None, 1, 1, 2, 1, 1, 4, 1, 1, 2, 5, None, 3, 1, 1, 3, 4, 1, 1, 2, 1, 1, 3, 1, 2, 1, 1, 5, 2, 1, 2, None, 1, None, 1, 3, 3, 2, 1, 3, 4, None, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, None, 1, 3, 2, 2, 1, 2, 1, 1, 3, 1, 1, 1, 1, 3, 2, None, 1, 2, 7, 4, None, 4, 1, 2, 1, 1, 1, 3, 2, None, 2, 1, None, 1, 1, 1, 3, 1, 1, 1, 3, 4, 1, None, 2, 1, 2, 1, 4, 1, 2, 3, None, 1, 3, 2, 1, 4, 1, 3, 1, 2, 1, None, 5, 4, 1, 1, 4, 1, 2, 2, None, None, 1, 1, 3, 3, 1, 1, 1, 1, 1, 4, 4, 3, 1, 1, 1, 2, 1, 4, None, 1, 3, 1, 3, 1, 4, None, 4, 1, 1, 1, 1, 2, None, 3, 1, 1, 1, 1, 3, 3, 1, 2, 5, 1, 2, None, 1, 2, 1, 2, 1, 2, 1, 1, 5, 1, None, 1, 1, 1, 1, 4, 1, 3, None, 1, 2, 2, 2, 1, 3, 1, 1, 1, 1, 2, None, 2, 4, None, 2, 1, 1, 1, 1, None, 1, 2, 1, 6, 3, 1, 5, None, None, 4, 1, 2, 1, 1, 2, 4, None, 1, 1, 4, 1, 1, 1, 1, 5, 1, 6, None, 2, 1, 3, 1, 1, 2, 1, None, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, None, 1, 1, 1, 1, None, 1, 1, 1, 2, None, 1, 1, 1, 1, 1, 2, 1, 1, 2, 3, 1, 1, 1, None, 1, 2, 2, 1, 2, 2, 1, 1, 5, 1, 4, 3, 1, 1, 3, 1, 2, 5, None] + nSyllables = doc._.nSyllables + self.assertEqual(syllable_counts,nSyllables) + + def test_mean_nSylls(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + nSylls = 2.4840579710144928 + self.assertEqual(doc._.mean_nSyll,nSylls) + + def test_med_nSylls(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_nSyll,2.0) + + def test_max_nSylls(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_nSyll,7.0) + + def test_min_nSylls(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_nSyll,1.0) + + def test_std_nSylls(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_nSyll,1.2033894945940653) + + def test_sqrtNChars(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + sqrtNChars = [1.7320508075688772, 3.0, 2.6457513110645907, 3.1622776601683795, 3.1622776601683795, 2.0, 2.0, 2.8284271247461903, 2.23606797749979, 1.4142135623730951, 2.449489742783178, 2.23606797749979, 3.1622776601683795, 2.0, 1.7320508075688772, 2.0, 2.6457513110645907, 1.0, 2.449489742783178, 2.23606797749979, 1.7320508075688772, 2.0, 1.4142135623730951, 2.0, 1.4142135623730951, 2.6457513110645907, 2.23606797749979, 1.7320508075688772, 2.23606797749979, 2.23606797749979, 1.4142135623730951, 2.449489742783178, 2.0, 2.6457513110645907, 2.0, 3.4641016151377544, 1.0, 1.0, 2.23606797749979, 3.1622776601683795, 1.4142135623730951, 1.0, 2.6457513110645907, 1.7320508075688772, 2.6457513110645907, 1.7320508075688772, 3.1622776601683795, 1.7320508075688772, 3.7416573867739413, 1.7320508075688772, 2.23606797749979, 1.0, 2.0, 2.449489742783178, 2.6457513110645907, 1.4142135623730951, 2.0, 1.4142135623730951, 1.4142135623730951, 3.1622776601683795, 2.0, 2.0, 1.4142135623730951, 1.4142135623730951, 2.8284271247461903, 3.1622776601683795, 2.449489742783178, 1.0, 2.449489742783178, 1.7320508075688772, 2.6457513110645907, 1.0, 2.6457513110645907, 1.7320508075688772, 2.0, 2.0, 1.7320508075688772, 2.8284271247461903, 2.0, 2.8284271247461903, 2.0, 1.0, 2.8284271247461903, 2.0, 3.0, 3.3166247903554, 1.4142135623730951, 2.6457513110645907, 2.6457513110645907, 2.0, 1.7320508075688772, 1.7320508075688772, 2.6457513110645907, 2.0, 1.0, 2.0, 2.6457513110645907, 2.0, 1.0, 2.449489742783178, 2.6457513110645907, 2.0, 1.4142135623730951, 2.6457513110645907, 2.6457513110645907, 2.8284271247461903, 2.8284271247461903, 2.0, 2.6457513110645907, 3.1622776601683795, 2.8284271247461903, 1.7320508075688772, 2.23606797749979, 1.0, 2.0, 1.4142135623730951, 2.23606797749979, 2.449489742783178, 3.3166247903554, 2.23606797749979, 2.0, 2.0, 3.605551275463989, 1.4142135623730951, 1.7320508075688772, 2.0, 1.4142135623730951, 1.7320508075688772, 2.0, 2.6457513110645907, 1.0, 1.4142135623730951, 1.7320508075688772, 3.0, 2.8284271247461903, 1.4142135623730951, 2.449489742783178, 2.23606797749979, 2.8284271247461903, 2.6457513110645907, 1.4142135623730951, 1.0, 3.0, 1.4142135623730951, 1.7320508075688772, 2.6457513110645907, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.23606797749979, 1.7320508075688772, 3.1622776601683795, 1.0, 1.7320508075688772, 3.1622776601683795, 1.4142135623730951, 2.0, 1.4142135623730951, 3.0, 2.8284271247461903, 1.4142135623730951, 3.1622776601683795, 2.6457513110645907, 1.7320508075688772, 2.0, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.23606797749979, 3.1622776601683795, 1.4142135623730951, 2.23606797749979, 2.8284271247461903, 3.1622776601683795, 1.0, 2.6457513110645907, 2.0, 1.4142135623730951, 1.7320508075688772, 3.4641016151377544, 1.0, 1.7320508075688772, 2.23606797749979, 2.23606797749979, 2.0, 2.6457513110645907, 1.0, 1.7320508075688772, 1.0, 2.8284271247461903, 1.0, 1.4142135623730951, 2.449489742783178, 2.23606797749979, 1.0, 1.7320508075688772, 3.4641016151377544, 2.449489742783178, 2.23606797749979, 2.0, 1.4142135623730951, 2.0, 3.0, 2.6457513110645907, 1.4142135623730951, 3.0, 1.0, 3.3166247903554, 3.1622776601683795, 1.7320508075688772, 3.605551275463989, 1.0, 3.1622776601683795, 2.23606797749979, 2.8284271247461903, 2.0, 2.8284271247461903, 1.4142135623730951, 2.449489742783178, 1.7320508075688772, 2.8284271247461903, 2.8284271247461903, 1.0, 1.4142135623730951, 2.6457513110645907, 1.0, 2.0, 2.8284271247461903, 1.4142135623730951, 3.1622776601683795, 2.0, 1.7320508075688772, 3.3166247903554, 2.8284271247461903, 1.7320508075688772, 3.1622776601683795, 2.0, 2.23606797749979, 1.7320508075688772, 2.23606797749979, 2.6457513110645907, 1.0, 1.7320508075688772, 2.23606797749979, 2.8284271247461903, 2.449489742783178, 2.0, 3.1622776601683795, 2.449489742783178, 1.7320508075688772, 3.3166247903554, 1.0, 1.7320508075688772, 1.7320508075688772, 1.0, 2.8284271247461903, 1.7320508075688772, 2.23606797749979, 1.7320508075688772, 2.6457513110645907, 3.1622776601683795, 2.0, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.0, 2.0, 3.3166247903554, 1.0, 2.0, 3.1622776601683795, 2.0, 1.7320508075688772, 2.8284271247461903, 1.7320508075688772, 2.0, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.23606797749979, 1.7320508075688772, 3.1622776601683795, 1.0, 1.4142135623730951, 2.0, 1.0, 3.1622776601683795, 2.23606797749979, 2.8284271247461903, 1.4142135623730951, 1.7320508075688772, 2.0, 2.449489742783178, 1.7320508075688772, 2.8284271247461903, 1.0, 1.7320508075688772, 1.7320508075688772, 2.449489742783178, 2.449489742783178, 1.7320508075688772, 2.449489742783178, 2.0, 1.7320508075688772, 1.7320508075688772, 2.23606797749979, 2.6457513110645907, 3.1622776601683795, 1.0, 1.7320508075688772, 2.6457513110645907, 1.0, 1.7320508075688772, 3.605551275463989, 1.4142135623730951, 3.3166247903554, 1.7320508075688772, 3.1622776601683795, 1.0, 2.0, 1.7320508075688772, 2.0, 3.4641016151377544, 1.4142135623730951, 1.0, 2.449489742783178, 2.23606797749979, 1.0, 2.0, 3.1622776601683795, 2.449489742783178, 2.6457513110645907, 2.0, 2.8284271247461903, 2.6457513110645907, 1.0, 2.449489742783178, 2.6457513110645907, 2.6457513110645907, 1.0, 2.6457513110645907, 3.605551275463989, 1.4142135623730951, 1.7320508075688772, 2.23606797749979, 1.0, 1.7320508075688772, 1.0, 2.23606797749979, 3.1622776601683795, 1.0, 2.8284271247461903, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772, 1.7320508075688772, 2.6457513110645907, 2.449489742783178, 1.0, 2.449489742783178, 2.0, 1.7320508075688772, 1.7320508075688772, 3.0, 1.4142135623730951, 2.8284271247461903, 1.0, 2.8284271247461903, 2.449489742783178, 2.6457513110645907, 1.4142135623730951, 2.6457513110645907, 2.8284271247461903, 2.449489742783178, 2.0, 1.4142135623730951, 2.23606797749979, 1.0, 3.0, 1.4142135623730951, 2.23606797749979, 2.6457513110645907, 2.8284271247461903, 2.6457513110645907, 1.7320508075688772, 3.1622776601683795, 3.4641016151377544, 1.4142135623730951, 2.8284271247461903, 3.1622776601683795, 1.7320508075688772, 3.3166247903554, 1.0, 1.4142135623730951, 1.4142135623730951, 2.8284271247461903, 1.4142135623730951, 1.7320508075688772, 3.0, 1.0, 1.4142135623730951, 1.7320508075688772, 2.0, 1.7320508075688772, 1.7320508075688772, 3.1622776601683795, 2.23606797749979, 1.7320508075688772, 2.23606797749979, 3.3166247903554, 1.0, 2.8284271247461903, 1.7320508075688772, 1.7320508075688772, 2.6457513110645907, 3.1622776601683795, 1.7320508075688772, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 1.7320508075688772, 2.8284271247461903, 1.7320508075688772, 2.6457513110645907, 1.7320508075688772, 1.4142135623730951, 3.605551275463989, 2.8284271247461903, 1.4142135623730951, 2.23606797749979, 1.0, 2.23606797749979, 1.0, 1.0, 2.6457513110645907, 2.8284271247461903, 2.449489742783178, 1.7320508075688772, 2.6457513110645907, 3.3166247903554, 1.0, 2.6457513110645907, 2.8284271247461903, 1.4142135623730951, 2.0, 2.8284271247461903, 2.8284271247461903, 1.7320508075688772, 1.0, 2.0, 2.8284271247461903, 2.449489742783178, 2.23606797749979, 1.0, 2.0, 2.8284271247461903, 2.23606797749979, 2.8284271247461903, 1.4142135623730951, 2.8284271247461903, 2.0, 2.0, 3.1622776601683795, 2.449489742783178, 1.7320508075688772, 1.4142135623730951, 1.7320508075688772, 2.6457513110645907, 2.449489742783178, 1.0, 2.0, 3.0, 4.123105625617661, 3.4641016151377544, 1.0, 3.3166247903554, 1.7320508075688772, 2.449489742783178, 2.0, 1.7320508075688772, 2.0, 3.1622776601683795, 2.6457513110645907, 1.0, 2.6457513110645907, 2.23606797749979, 1.0, 1.7320508075688772, 1.4142135623730951, 1.7320508075688772, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772, 1.4142135623730951, 3.0, 3.3166247903554, 2.23606797749979, 1.0, 2.23606797749979, 1.7320508075688772, 2.23606797749979, 1.4142135623730951, 3.0, 2.0, 2.449489742783178, 3.1622776601683795, 1.0, 1.7320508075688772, 2.8284271247461903, 2.8284271247461903, 1.4142135623730951, 3.0, 1.7320508075688772, 2.8284271247461903, 1.7320508075688772, 2.449489742783178, 2.23606797749979, 1.0, 3.7416573867739413, 3.4641016151377544, 2.0, 2.23606797749979, 2.6457513110645907, 1.4142135623730951, 2.6457513110645907, 2.449489742783178, 1.0, 1.4142135623730951, 2.0, 2.0, 2.6457513110645907, 2.8284271247461903, 1.7320508075688772, 2.0, 2.0, 1.4142135623730951, 1.7320508075688772, 3.1622776601683795, 2.8284271247461903, 2.8284271247461903, 2.0, 1.4142135623730951, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.8284271247461903, 1.0, 1.4142135623730951, 3.1622776601683795, 1.7320508075688772, 2.8284271247461903, 1.4142135623730951, 3.1622776601683795, 1.0, 3.1622776601683795, 2.23606797749979, 1.7320508075688772, 1.7320508075688772, 1.4142135623730951, 2.8284271247461903, 1.0, 2.8284271247461903, 1.7320508075688772, 1.7320508075688772, 2.0, 2.0, 2.6457513110645907, 3.0, 1.7320508075688772, 2.8284271247461903, 3.3166247903554, 1.4142135623730951, 2.8284271247461903, 1.0, 2.0, 2.6457513110645907, 1.7320508075688772, 2.6457513110645907, 1.7320508075688772, 2.23606797749979, 2.0, 2.23606797749979, 3.605551275463989, 2.0, 1.0, 1.7320508075688772, 1.7320508075688772, 2.0, 1.7320508075688772, 3.1622776601683795, 1.4142135623730951, 2.8284271247461903, 1.0, 2.0, 2.8284271247461903, 2.449489742783178, 2.23606797749979, 1.4142135623730951, 2.6457513110645907, 1.0, 2.23606797749979, 2.0, 1.4142135623730951, 2.8284271247461903, 1.0, 2.23606797749979, 3.1622776601683795, 1.0, 2.449489742783178, 2.23606797749979, 2.0, 2.23606797749979, 1.7320508075688772, 1.0, 1.7320508075688772, 2.8284271247461903, 1.7320508075688772, 3.7416573867739413, 2.8284271247461903, 1.7320508075688772, 3.1622776601683795, 1.0, 1.4142135623730951, 3.1622776601683795, 2.0, 2.449489742783178, 2.0, 1.7320508075688772, 2.23606797749979, 3.1622776601683795, 1.0, 2.0, 1.7320508075688772, 3.0, 1.4142135623730951, 2.0, 1.4142135623730951, 1.7320508075688772, 3.7416573867739413, 1.4142135623730951, 3.7416573867739413, 1.0, 2.23606797749979, 1.7320508075688772, 2.6457513110645907, 1.4142135623730951, 1.7320508075688772, 2.23606797749979, 2.0, 1.0, 2.23606797749979, 2.0, 1.4142135623730951, 1.4142135623730951, 2.23606797749979, 1.4142135623730951, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.8284271247461903, 1.0, 2.0, 1.7320508075688772, 1.7320508075688772, 1.7320508075688772, 1.0, 1.7320508075688772, 1.4142135623730951, 1.4142135623730951, 2.449489742783178, 1.0, 2.23606797749979, 1.4142135623730951, 1.4142135623730951, 2.0, 1.4142135623730951, 2.6457513110645907, 1.4142135623730951, 1.0, 2.6457513110645907, 2.8284271247461903, 1.4142135623730951, 1.7320508075688772, 2.449489742783178, 1.0, 1.7320508075688772, 2.449489742783178, 2.6457513110645907, 1.0, 2.6457513110645907, 2.6457513110645907, 1.4142135623730951, 1.7320508075688772, 3.605551275463989, 2.0, 3.1622776601683795, 2.8284271247461903, 1.7320508075688772, 1.7320508075688772, 2.6457513110645907, 1.4142135623730951, 2.23606797749979, 3.3166247903554, 1.0] + self.assertEqual(doc._.sqrtNChars,sqrtNChars) + + def test_mean_sqrtNChars(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + sqrtNChars = 2.671858311020255 + self.assertEqual(doc._.mean_sqnChars,sqrtNChars) + + def test_med_sqrtNChars(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_sqnChars,2.6457513110645907) + + def test_max_sqrtNChars(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_sqnChars,4.123105625617661) + + def test_min_sqrtNChars(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_sqnChars,1.4142135623730951) + + def test_std_sqrtNChars(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_sqnChars,0.4641344226022158) + + def test_latinates(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + latinates = [None, 1, 0, 0, 1, None, 0, 0, 0, None, 0, 0, 0, None, None, 0, 0, None, 0, None, None, None, None, 0, None, 0, None, None, 0, 0, None, 0, None, 0, None, 1, None, None, 0, 1, None, None, 1, 0, 0, None, 0, None, 1, None, 0, None, None, 0, 1, None, 0, None, None, 1, None, 0, None, None, 1, 1, 0, None, None, None, 0, None, 0, None, 0, None, None, 1, None, 1, None, None, 0, None, 1, 1, None, 0, 0, None, None, None, 0, 0, None, 0, 0, 0, None, 0, 0, None, None, 0, None, 0, 0, None, 1, 0, 0, None, 0, None, None, None, None, 0, 1, 0, None, None, 1, None, None, 0, None, None, None, 0, None, None, None, 1, 0, None, 0, None, 1, 0, None, None, 1, None, None, 1, None, 0, None, 0, None, None, None, None, 0, None, None, None, 1, 1, None, 0, 1, None, 0, None, 0, None, 0, 1, None, 0, 0, 0, None, 0, 0, None, None, 1, None, 0, 0, 0, None, 0, None, 0, None, 0, None, None, 0, 0, None, None, 1, 0, 0, 0, None, 0, 1, 0, None, 1, None, 1, 1, None, 1, None, 0, 0, 0, None, 0, None, 0, None, 0, 0, None, None, 0, None, None, 1, None, 0, None, None, 1, 1, None, 1, None, 0, None, 0, 0, None, None, 0, 0, 0, None, 0, 0, None, 0, None, None, 0, None, 0, None, 0, None, 0, 1, 0, None, 0, None, 0, None, 0, None, None, 0, None, None, 1, None, 0, None, 0, None, 0, None, None, None, None, 0, None, 0, 0, 1, None, None, None, 0, 0, 0, None, None, 0, None, 0, 0, 0, None, None, None, 0, 0, 0, None, None, 0, None, None, 1, None, 1, None, 1, None, 0, None, 0, 1, None, None, 1, 0, None, None, 1, 0, 1, None, 1, 0, None, 1, 0, 0, None, 0, 1, None, None, 0, None, None, None, 0, 1, None, 0, 1, None, 0, None, 0, 0, None, 0, None, None, None, 1, None, 1, None, 0, 0, 0, None, 0, 1, 0, None, None, 0, None, 1, None, None, 1, 0, 1, None, 0, 1, None, 0, 1, None, 1, None, None, None, 0, None, None, 1, None, None, None, 0, 0, None, 0, 0, None, 0, 1, None, 0, None, None, 1, 1, None, None, 0, None, None, None, None, 0, None, None, 0, 1, None, 0, None, None, None, None, 1, None, 0, None, 1, 1, None, 0, 0, None, 0, 1, 0, None, None, None, 0, 0, 0, None, None, 0, 0, 0, None, 0, None, None, 0, 0, None, None, None, 1, 0, None, None, 1, 1, 1, None, 1, None, 0, None, None, None, 0, 0, None, None, None, None, 0, None, None, 0, None, None, None, 1, 1, 0, None, 0, None, 0, None, 0, 0, None, 0, None, None, 0, 0, None, 1, None, 0, None, 0, 0, None, 1, 1, None, 0, 0, None, 0, 0, None, None, None, 0, 0, 1, None, None, 0, None, None, 0, 1, 1, 0, None, None, 0, None, 1, None, None, 1, None, 1, None, 0, None, 1, 0, None, None, None, 0, None, 0, None, None, 0, None, 0, 0, None, 0, 1, None, 0, None, None, 0, None, 0, None, 0, 0, 0, 1, 0, None, None, 0, None, None, 0, None, 0, None, 0, 0, 0, 0, None, 0, None, 0, 0, None, 0, None, 0, 0, None, 0, 0, None, 0, None, None, None, 0, None, 1, 0, None, 1, None, None, 0, None, 0, 0, None, 0, 0, None, None, None, 0, None, 0, None, None, 1, None, None, None, 0, None, 0, None, None, 0, 0, None, None, None, None, None, 0, None, None, 0, None, 0, None, None, 0, None, 0, None, None, None, None, 0, None, None, None, None, 0, None, 0, None, None, 1, 0, None, 0, 0, None, None, 0, 0, None, 0, 0, None, None, 1, None, 0, 1, None, 0, 0, None, 0, 1, None] + + self.assertEqual(doc._.latinates,latinates) + + def test_propn_latinate(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.propn_latinate,0.2835820895522388) + + def test_academics(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + academic = [0, 1, 1, 1, 1, 0, 0, 0, 0, None, 1, 0, 1, 0, 0, 0, 0, None, 0, 0, 0, 0, None, 0, None, 0, 0, 0, 0, 0, None, 0, 0, 0, 0, 1, None, None, 0, 1, None, None, 1, 0, 1, 0, 1, 0, 1, 0, 0, None, 0, 0, 0, None, 0, None, None, 0, 0, 0, None, None, 1, 0, 0, None, 0, 0, 0, None, 0, 0, 0, 0, 0, 1, 0, 1, 0, None, 1, 0, 1, 1, None, 0, 0, 0, 0, None, 0, 0, None, 0, 0, 0, None, 0, 1, 0, None, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, None, 0, None, 0, 0, 1, 0, 0, 0, 1, None, 0, 0, None, 0, 0, 0, None, None, 0, 1, 1, None, 0, 0, 1, 0, None, None, 1, None, 0, 1, 0, 0, None, 0, 0, 0, None, 0, 1, None, 0, None, 1, 1, None, 1, 1, 0, 0, 0, 0, None, 0, 1, None, 1, 1, 0, None, 0, 0, None, 0, 1, None, 0, 0, 0, 0, 0, None, 0, None, 1, None, None, 1, 0, None, 0, 1, 0, 0, 0, None, 0, 1, 1, None, 1, None, 1, 1, 0, 1, None, 1, 0, 1, 0, 0, None, 0, 0, 1, 1, None, None, 1, None, 0, 1, None, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, None, 0, 1, 1, 1, 0, 1, 0, 0, 0, None, 0, 0, None, 1, 0, 0, 0, 1, 1, 0, 0, 0, None, 0, 0, 1, None, 0, 1, 0, 0, 0, 0, 0, 0, 0, None, 0, 0, 0, None, None, 0, None, 1, 0, 0, None, 0, 0, 0, 0, 1, None, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, None, 0, 1, None, 0, 0, None, 0, 0, 1, None, 0, 0, 1, 1, None, None, 1, 1, None, 0, 1, 1, 0, 0, 1, 0, None, 1, 0, 0, None, 1, 1, None, 0, 0, None, 0, None, 0, 1, None, 1, 1, None, 0, 0, 1, 0, None, 1, 0, 0, 0, 1, None, 0, None, 0, 0, 0, None, 0, 1, 0, 0, None, 0, None, 1, None, 0, 1, 1, 1, 0, 0, 0, None, 0, 0, 0, 0, None, None, None, 1, None, 0, 1, None, None, 0, 0, 0, 0, 1, 0, 0, 0, 0, None, 1, 0, 0, 0, 1, 0, 0, 0, None, 0, 0, 0, 0, 0, None, 1, 0, None, 1, None, 0, None, None, 0, 0, 0, 0, 1, 1, None, 1, 0, None, 0, 1, 1, 0, None, 0, 0, 0, 0, None, 0, 0, 0, 0, None, 0, 0, 0, 1, 0, 0, None, 0, 1, 0, None, 0, 1, 0, 1, None, 0, 0, 0, 0, 0, 0, 1, 0, None, 0, 0, None, 0, None, 0, 1, None, 0, None, 0, 1, 1, None, 0, 0, 1, None, 1, 0, 0, 1, None, 0, 1, 0, None, 1, 0, 0, 0, 0, 1, None, 0, 1, 0, 0, 0, None, 0, 0, None, None, 0, 0, 1, 1, 0, 0, 0, None, 0, 1, 1, 1, 0, None, 0, 1, None, 0, None, None, 1, 0, 1, None, 1, None, 1, 1, 0, 0, None, 1, None, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, None, 0, None, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, None, 0, 0, 0, 0, 1, None, 0, None, 0, 0, 0, 0, None, 0, None, 0, 0, None, 0, None, 0, 1, None, 0, 0, 0, 0, 0, None, 0, 0, 0, 1, 1, 0, 0, None, None, 1, 0, 0, 0, 0, 0, 1, None, 0, 0, 1, None, 0, None, 0, 1, None, 0, None, 0, 0, 0, None, 0, 0, 0, None, 0, 0, None, None, 1, None, 0, 0, None, 1, None, 0, 0, 0, 0, None, 0, None, None, 0, None, 0, None, None, 0, None, 0, None, None, 0, 1, None, 0, 0, None, 0, 0, 1, None, 0, 0, None, 0, 1, 0, 1, 1, 0, 0, 0, None, 0, 0, None] + self.assertEqual(doc._.academics,academic) + + + def test_propn_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.propn_academic,0.4418604651162791) + + def test_family_sizes(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + family_sizes = [1, 7, 13, 7, 5, 2, 11, 22, 16, 2, 2, 13, 8, 2, 1, 2, 2, None, 6, 1, 13, 24, 2, 15, 7, 13, 2, 1, 5, 5, 1, 4, 13, 11, 1, 20, None, 2, 11, 11, 2, 2, 4, 7, 8, 2, 7, 13, 14, 1, 6, None, 2, 4, 6, 1, 15, 7, 2, 4, 3, 14, 2, 2, 12, 2, 3, None, 1, 1, 2, None, 5, 24, 5, 3, 1, 15, 9, 13, 2, 2, 21, 3, 14, 18, 2, 2, 5, 3, 24, None, 2, 11, None, 1, 5, 8, None, 5, 4, 9, 24, 4, 4, 2, 6, 3, 14, 2, 9, 1, 8, None, 1, 1, 2, 8, 10, 2, 13, 24, 8, 1, 1, 6, 1, 1, None, 2, None, None, 1, 7, 5, 1, 5, 2, 20, 11, 1, 2, 10, 7, 1, 8, 2, 13, 1, 22, 2, 8, None, 1, 2, 24, 3, 2, 5, 12, 2, 7, 6, 1, 9, 2, 4, 1, 22, 15, 1, 9, 2, 3, None, 7, 10, 1, 1, 15, None, 5, 2, 14, 3, 1, 2, 2, None, 21, None, 2, 6, 10, None, 1, 2, 15, 2, 9, 1, 8, 9, 8, 1, 8, None, 18, 14, 2, 11, None, 7, 11, 7, 2, 22, 2, 13, 1, 6, 1, None, None, 1, None, 2, 12, 2, 7, 22, 4, 5, 4, 1, 15, 3, 9, 1, 13, 5, None, 1, 2, 2, 8, 3, 7, 7, 2, 6, None, 1, 2, None, 21, 2, 10, 1, 8, 11, 15, 2, 4, 1, 5, 1, 8, None, 2, 8, 22, 4, 4, 1, 9, 2, 13, 1, 22, 2, 8, None, 7, 3, None, 7, 11, 13, 1, 4, 1, 4, 14, 6, None, 2, 1, 3, 15, 14, 7, 3, 22, 4, 8, 1, 7, None, 2, 2, None, 1, 7, 1, 4, 13, 15, 2, 9, 2, 6, 10, 2, 2, 15, 6, None, 2, 5, 17, 5, 1, 6, 11, None, 15, 9, 4, 2, 6, 8, 1, 1, 5, None, 2, None, 2, 4, None, 1, 4, 2, 9, 13, 15, 23, None, 5, 3, 24, 4, 12, 2, 11, None, 7, 10, 4, 1, 11, 4, 13, 1, 1, 11, None, 9, 1, 2, 5, 6, 6, 1, 6, 17, 1, 2, 9, 2, 19, None, None, 7, 5, 1, 1, 7, None, 6, 2, 1, 11, 2, 7, 11, 1, 13, 17, None, 14, 2, 1, 2, 14, 2, 1, 2, 1, 1, 1, 13, 7, 2, 2, 9, 7, 1, 2, None, None, None, 2, 19, 1, 2, 2, 4, 18, None, 4, 1, 1, 12, 6, 4, 2, 2, 1, 18, 7, 15, None, 2, 5, 14, 2, 1, 22, 3, 24, 2, 4, 3, 1, 1, 4, 2, None, 2, 5, 9, 13, None, 9, 2, 6, 1, 1, 2, 8, 4, None, None, None, None, 5, 1, 1, 2, 1, 1, 1, 7, 19, 3, None, 7, 4, 2, 2, 2, 14, 1, 2, None, 1, 6, 21, 1, 19, 2, 4, 13, 7, 10, None, 8, 7, 1, 3, None, 1, 4, None, None, None, 2, 2, 2, 6, 1, 2, 13, 7, 2, 7, 16, 6, 13, 1, 1, 7, 1, 13, None, 2, 5, 6, 12, 2, 7, None, 8, 3, 2, 1, 24, 10, None, 14, 2, 1, 5, None, 2, 5, 1, 12, 8, 1, 2, None, 2, 3, 13, 7, 1, 13, 6, 1, None, 7, None, 2, 1, 2, 1, 7, 1, 2, None, 11, 22, 13, 5, 1, 17, 2, 6, 11, 1, 2, None, 28, 7, None, 6, 7, 24, 12, 11, None, 2, 2, 24, 8, 6, 2, 5, None, None, 7, 9, 2, 9, 1, 13, 8, None, 1, 1, 15, 1, 8, 1, 1, 6, 1, 1, None, 9, 1, 13, 1, 1, 13, 6, None, 1, 9, 24, 2, 10, 1, 1, 11, 1, 6, None, 1, 14, 2, 9, None, 2, 6, 1, 4, None, 1, 24, 2, 9, 1, 4, 1, 2, 2, 3, 1, 14, 5, None, 2, 1, 4, 2, 13, 6, 1, 1, 8, 3, 7, 6, 2, 14, 2, 1, 13, 17, None] + + self.assertEqual(doc._.family_sizes,family_sizes) + + def test_mean_family_size(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + mean_family_size = 8.439169139465875 + self.assertEqual(doc._.mean_family_size,mean_family_size) + + def test_med_family_size(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_family_size,7.0) + + def test_max_family_size(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_family_size,23.0) + + def test_min_family_size(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_family_size,1.0) + + def test_std_family_size(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_family_size,5.163690473645397) + + def test_sensenums(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + nSenses = [None, 7, 13, 2, 2, None, 22, 15, 52, 5, 3, 4, 8, 7, None, 6, 2, None, 1, 4, 20, 14, 6, 15, 7, 5, None, None, 19, 14, None, 6, 20, 20, 5, 3, None, 7, 8, 8, 5, 7, 3, 10, 3, None, 2, 20, 3, None, 9, None, 5, 6, 6, None, 34, 7, 1, 2, None, 57, 5, 1, 5, 3, 4, None, None, None, 2, None, 9, 14, 18, None, None, 1, 6, 1, None, 7, 2, None, 13, 5, 5, 1, 12, None, 14, None, 1, 11, None, None, 17, 17, None, 8, 5, 6, 14, 13, 7, 5, 6, None, 2, 5, 6, None, 9, None, 2, None, None, 10, 2, None, 20, 14, 1, 2, None, 38, None, None, 1, 2, None, None, None, 7, 4, None, 12, None, 4, 20, None, 7, 3, 7, None, 2, None, 4, None, 14, None, None, None, None, 7, 14, None, 1, 7, 2, 5, 2, 4, None, 7, None, 6, None, 14, 1, None, 3, 3, 2, None, 14, 28, 2, None, 7, None, 9, None, 3, None, None, 7, 5, None, 2, None, 2, 8, 4, None, None, 2, 4, None, 7, None, 18, 10, 2, None, 11, None, 5, 8, None, 3, None, 2, 23, 8, None, 15, 2, 51, None, 3, 1, None, None, 4, None, None, 2, 5, 2, 16, 1, 3, 2, None, 1, None, 30, None, 4, 1, None, None, 2, 6, 3, None, 2, 10, None, 4, None, None, 5, None, 2, None, 4, 3, 22, 1, 15, None, 6, None, 19, 5, 1, None, None, 2, 16, 1, 2, None, 7, None, 4, None, 14, None, None, None, 7, 4, None, 2, 22, 3, None, 1, 9, 8, 12, 3, None, 1, 2, None, 6, 12, 16, None, 16, 1, 2, None, 2, None, None, 6, None, None, 2, None, 2, 20, 10, 7, 7, None, 5, 3, 5, 7, 2, 18, None, None, 7, 7, 11, None, 5, 9, None, 2, 6, 4, 7, 7, 1, None, None, 6, None, None, None, 3, 7, None, 3, 2, 5, 6, 20, 6, 4, None, 11, None, 14, 1, 7, 5, 3, None, 10, 19, 6, None, 2, 3, 5, 2, 10, 4, None, 5, None, None, 5, 3, 4, None, 1, 3, None, 3, 1, None, 3, None, None, 7, 7, None, None, 7, None, None, 8, 14, 25, None, 2, 22, None, 4, 3, None, 9, None, None, 3, 3, None, None, 3, None, None, 1, 20, 10, None, 1, 1, 17, None, 5, None, None, None, 7, 4, 1, 3, None, 4, 5, None, 6, 3, None, 3, 4, 2, None, 7, 5, 4, 7, 13, None, None, 10, 36, 2, None, 15, None, 14, 1, 37, 9, None, None, 4, 1, None, None, 7, 1, 2, None, 6, 8, 2, None, None, 5, 4, 14, None, None, None, None, 9, None, None, 1, None, None, 1, None, 9, 4, None, 30, None, 5, 5, 3, 11, 1, 2, None, None, 3, 4, None, 5, None, 5, 20, 3, 8, None, None, 2, None, 7, 2, None, 5, 3, None, None, None, 21, 6, 7, None, 5, 9, 7, None, 2, 4, 7, 9, None, None, 7, None, 3, None, 2, 7, None, 2, 5, 2, None, 4, 4, 8, 8, 14, 1, None, 9, None, None, 11, 1, 2, 7, None, 10, 5, None, 1, None, None, 1, 20, 3, None, 4, 10, None, 1, 10, None, None, 6, None, None, 2, None, 1, None, 22, 15, 4, 4, None, 2, 7, 9, 22, None, 1, None, 13, 2, None, 4, 7, 14, 45, 17, None, None, 1, 14, 1, 6, None, 2, None, None, 2, 6, 5, 30, None, 4, 8, None, None, None, 4, None, 18, None, None, 2, None, 1, None, 45, None, 5, None, None, 4, 10, None, 4, 6, 14, 6, 9, None, None, 17, None, 3, None, 1, 12, None, 9, None, None, None, None, 8, None, 4, 14, 6, 7, None, 11, None, 7, 2, 4, None, 12, 12, None, 1, 4, 6, 7, 3, 6, None, None, 4, None, 2, 7, None, 12, 2, None, 4, 3, None] + self.assertEqual(doc._.sensenums,nSenses) + + def test_mean_nSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + mean_nSenses = 7.238235294117647 + self.assertEqual(doc._.mean_nSenses,mean_nSenses) + + def test_med_nSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_nSenses,4.5) + + def test_max_nSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_nSenses,57.0) + + def test_min_nSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_nSenses,1.0) + + def test_std_nSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_nSenses,8.193403876479936) + + def test_log_sensenums(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + logsensenums=[None, 1.9459101490553132, 2.5649493574615367, 0.6931471805599453, 0.6931471805599453, None, 3.091042453358316, 2.70805020110221, 3.9512437185814275, 1.6094379124341003, 1.0986122886681098, 1.3862943611198906, 2.0794415416798357, 1.9459101490553132, None, 1.791759469228055, 0.6931471805599453, None, 0.0, 1.3862943611198906, 2.995732273553991, 2.6390573296152584, 1.791759469228055, 2.70805020110221, 1.9459101490553132, 1.6094379124341003, None, None, 2.9444389791664403, 2.6390573296152584, None, 1.791759469228055, 2.995732273553991, 2.995732273553991, 1.6094379124341003, 1.0986122886681098, None, 1.9459101490553132, 2.0794415416798357, 2.0794415416798357, 1.6094379124341003, 1.9459101490553132, 1.0986122886681098, 2.302585092994046, 1.0986122886681098, None, 0.6931471805599453, 2.995732273553991, 1.0986122886681098, None, 2.1972245773362196, None, 1.6094379124341003, 1.791759469228055, 1.791759469228055, None, 3.5263605246161616, 1.9459101490553132, 0.0, 0.6931471805599453, None, 4.04305126783455, 1.6094379124341003, 0.0, 1.6094379124341003, 1.0986122886681098, 1.3862943611198906, None, None, None, 0.6931471805599453, None, 2.1972245773362196, 2.6390573296152584, 2.8903717578961645, None, None, 0.0, 1.791759469228055, 0.0, None, 1.9459101490553132, 0.6931471805599453, None, 2.5649493574615367, 1.6094379124341003, 1.6094379124341003, 0.0, 2.4849066497880004, None, 2.6390573296152584, None, 0.0, 2.3978952727983707, None, None, 2.833213344056216, 2.833213344056216, None, 2.0794415416798357, 1.6094379124341003, 1.791759469228055, 2.6390573296152584, 2.5649493574615367, 1.9459101490553132, 1.6094379124341003, 1.791759469228055, None, 0.6931471805599453, 1.6094379124341003, 1.791759469228055, None, 2.1972245773362196, None, 0.6931471805599453, None, None, 2.302585092994046, 0.6931471805599453, None, 2.995732273553991, 2.6390573296152584, 0.0, 0.6931471805599453, None, 3.6375861597263857, None, None, 0.0, 0.6931471805599453, None, None, None, 1.9459101490553132, 1.3862943611198906, None, 2.4849066497880004, None, 1.3862943611198906, 2.995732273553991, None, 1.9459101490553132, 1.0986122886681098, 1.9459101490553132, None, 0.6931471805599453, None, 1.3862943611198906, None, 2.6390573296152584, None, None, None, None, 1.9459101490553132, 2.6390573296152584, None, 0.0, 1.9459101490553132, 0.6931471805599453, 1.6094379124341003, 0.6931471805599453, 1.3862943611198906, None, 1.9459101490553132, None, 1.791759469228055, None, 2.6390573296152584, 0.0, None, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453, None, 2.6390573296152584, 3.332204510175204, 0.6931471805599453, None, 1.9459101490553132, None, 2.1972245773362196, None, 1.0986122886681098, None, None, 1.9459101490553132, 1.6094379124341003, None, 0.6931471805599453, None, 0.6931471805599453, 2.0794415416798357, 1.3862943611198906, None, None, 0.6931471805599453, 1.3862943611198906, None, 1.9459101490553132, None, 2.8903717578961645, 2.302585092994046, 0.6931471805599453, None, 2.3978952727983707, None, 1.6094379124341003, 2.0794415416798357, None, 1.0986122886681098, None, 0.6931471805599453, 3.1354942159291497, 2.0794415416798357, None, 2.70805020110221, 0.6931471805599453, 3.9318256327243257, None, 1.0986122886681098, 0.0, None, None, 1.3862943611198906, None, None, 0.6931471805599453, 1.6094379124341003, 0.6931471805599453, 2.772588722239781, 0.0, 1.0986122886681098, 0.6931471805599453, None, 0.0, None, 3.4011973816621555, None, 1.3862943611198906, 0.0, None, None, 0.6931471805599453, 1.791759469228055, 1.0986122886681098, None, 0.6931471805599453, 2.302585092994046, None, 1.3862943611198906, None, None, 1.6094379124341003, None, 0.6931471805599453, None, 1.3862943611198906, 1.0986122886681098, 3.091042453358316, 0.0, 2.70805020110221, None, 1.791759469228055, None, 2.9444389791664403, 1.6094379124341003, 0.0, None, None, 0.6931471805599453, 2.772588722239781, 0.0, 0.6931471805599453, None, 1.9459101490553132, None, 1.3862943611198906, None, 2.6390573296152584, None, None, None, 1.9459101490553132, 1.3862943611198906, None, 0.6931471805599453, 3.091042453358316, 1.0986122886681098, None, 0.0, 2.1972245773362196, 2.0794415416798357, 2.4849066497880004, 1.0986122886681098, None, 0.0, 0.6931471805599453, None, 1.791759469228055, 2.4849066497880004, 2.772588722239781, None, 2.772588722239781, 0.0, 0.6931471805599453, None, 0.6931471805599453, None, None, 1.791759469228055, None, None, 0.6931471805599453, None, 0.6931471805599453, 2.995732273553991, 2.302585092994046, 1.9459101490553132, 1.9459101490553132, None, 1.6094379124341003, 1.0986122886681098, 1.6094379124341003, 1.9459101490553132, 0.6931471805599453, 2.8903717578961645, None, None, 1.9459101490553132, 1.9459101490553132, 2.3978952727983707, None, 1.6094379124341003, 2.1972245773362196, None, 0.6931471805599453, 1.791759469228055, 1.3862943611198906, 1.9459101490553132, 1.9459101490553132, 0.0, None, None, 1.791759469228055, None, None, None, 1.0986122886681098, 1.9459101490553132, None, 1.0986122886681098, 0.6931471805599453, 1.6094379124341003, 1.791759469228055, 2.995732273553991, 1.791759469228055, 1.3862943611198906, None, 2.3978952727983707, None, 2.6390573296152584, 0.0, 1.9459101490553132, 1.6094379124341003, 1.0986122886681098, None, 2.302585092994046, 2.9444389791664403, 1.791759469228055, None, 0.6931471805599453, 1.0986122886681098, 1.6094379124341003, 0.6931471805599453, 2.302585092994046, 1.3862943611198906, None, 1.6094379124341003, None, None, 1.6094379124341003, 1.0986122886681098, 1.3862943611198906, None, 0.0, 1.0986122886681098, None, 1.0986122886681098, 0.0, None, 1.0986122886681098, None, None, 1.9459101490553132, 1.9459101490553132, None, None, 1.9459101490553132, None, None, 2.0794415416798357, 2.6390573296152584, 3.2188758248682006, None, 0.6931471805599453, 3.091042453358316, None, 1.3862943611198906, 1.0986122886681098, None, 2.1972245773362196, None, None, 1.0986122886681098, 1.0986122886681098, None, None, 1.0986122886681098, None, None, 0.0, 2.995732273553991, 2.302585092994046, None, 0.0, 0.0, 2.833213344056216, None, 1.6094379124341003, None, None, None, 1.9459101490553132, 1.3862943611198906, 0.0, 1.0986122886681098, None, 1.3862943611198906, 1.6094379124341003, None, 1.791759469228055, 1.0986122886681098, None, 1.0986122886681098, 1.3862943611198906, 0.6931471805599453, None, 1.9459101490553132, 1.6094379124341003, 1.3862943611198906, 1.9459101490553132, 2.5649493574615367, None, None, 2.302585092994046, 3.58351893845611, 0.6931471805599453, None, 2.70805020110221, None, 2.6390573296152584, 0.0, 3.6109179126442243, 2.1972245773362196, None, None, 1.3862943611198906, 0.0, None, None, 1.9459101490553132, 0.0, 0.6931471805599453, None, 1.791759469228055, 2.0794415416798357, 0.6931471805599453, None, None, 1.6094379124341003, 1.3862943611198906, 2.6390573296152584, None, None, None, None, 2.1972245773362196, None, None, 0.0, None, None, 0.0, None, 2.1972245773362196, 1.3862943611198906, None, 3.4011973816621555, None, 1.6094379124341003, 1.6094379124341003, 1.0986122886681098, 2.3978952727983707, 0.0, 0.6931471805599453, None, None, 1.0986122886681098, 1.3862943611198906, None, 1.6094379124341003, None, 1.6094379124341003, 2.995732273553991, 1.0986122886681098, 2.0794415416798357, None, None, 0.6931471805599453, None, 1.9459101490553132, 0.6931471805599453, None, 1.6094379124341003, 1.0986122886681098, None, None, None, 3.044522437723423, 1.791759469228055, 1.9459101490553132, None, 1.6094379124341003, 2.1972245773362196, 1.9459101490553132, None, 0.6931471805599453, 1.3862943611198906, 1.9459101490553132, 2.1972245773362196, None, None, 1.9459101490553132, None, 1.0986122886681098, None, 0.6931471805599453, 1.9459101490553132, None, 0.6931471805599453, 1.6094379124341003, 0.6931471805599453, None, 1.3862943611198906, 1.3862943611198906, 2.0794415416798357, 2.0794415416798357, 2.6390573296152584, 0.0, None, 2.1972245773362196, None, None, 2.3978952727983707, 0.0, 0.6931471805599453, 1.9459101490553132, None, 2.302585092994046, 1.6094379124341003, None, 0.0, None, None, 0.0, 2.995732273553991, 1.0986122886681098, None, 1.3862943611198906, 2.302585092994046, None, 0.0, 2.302585092994046, None, None, 1.791759469228055, None, None, 0.6931471805599453, None, 0.0, None, 3.091042453358316, 2.70805020110221, 1.3862943611198906, 1.3862943611198906, None, 0.6931471805599453, 1.9459101490553132, 2.1972245773362196, 3.091042453358316, None, 0.0, None, 2.5649493574615367, 0.6931471805599453, None, 1.3862943611198906, 1.9459101490553132, 2.6390573296152584, 3.8066624897703196, 2.833213344056216, None, None, 0.0, 2.6390573296152584, 0.0, 1.791759469228055, None, 0.6931471805599453, None, None, 0.6931471805599453, 1.791759469228055, 1.6094379124341003, 3.4011973816621555, None, 1.3862943611198906, 2.0794415416798357, None, None, None, 1.3862943611198906, None, 2.8903717578961645, None, None, 0.6931471805599453, None, 0.0, None, 3.8066624897703196, None, 1.6094379124341003, None, None, 1.3862943611198906, 2.302585092994046, None, 1.3862943611198906, 1.791759469228055, 2.6390573296152584, 1.791759469228055, 2.1972245773362196, None, None, 2.833213344056216, None, 1.0986122886681098, None, 0.0, 2.4849066497880004, None, 2.1972245773362196, None, None, None, None, 2.0794415416798357, None, 1.3862943611198906, 2.6390573296152584, 1.791759469228055, 1.9459101490553132, None, 2.3978952727983707, None, 1.9459101490553132, 0.6931471805599453, 1.3862943611198906, None, 2.4849066497880004, 2.4849066497880004, None, 0.0, 1.3862943611198906, 1.791759469228055, 1.9459101490553132, 1.0986122886681098, 1.791759469228055, None, None, 1.3862943611198906, None, 0.6931471805599453, 1.9459101490553132, None, 2.4849066497880004, 0.6931471805599453, None, 1.3862943611198906, 1.0986122886681098, None] + self.assertEqual(doc._.logsensenums,logsensenums) + + def test_mean_logNSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + mean_logNSenses = 1.5472290136608735 + self.assertEqual(doc._.mean_logNSenses,mean_logNSenses) + + def test_med_logNSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_logNSenses,1.4978661367769954) + + def test_max_logNSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_logNSenses,4.04305126783455) + + def test_min_logNSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_logNSenses,0.0) + + def test_std_logNSenses(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_logNSenses,0.9139907101609602) + + + def test_morpholex(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + morpholex = [None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(state)}>ment>', 'ROOT1_PFMF': '3.44', 'ROOT1_FamSize': '30', 'ROOT1_Freq_HAL': '451541', 'SUFF1_PFMF': '1.04', 'SUFF1_FamSize': '288', 'SUFF1_Freq_HAL': '1423689', 'SUFF1_length': '4', 'SUFF1_P': '1.26E-05', 'SUFF1_P*': '2.78E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(link)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '82099'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'RB', 'Nmorph': '4', 'PRS_signature': '"0,1,3"', 'MorphoLexSegm': '{(neg)>ate>}>ive>>ly>', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '29916', 'SUFF1_PFMF': '29.7', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01', 'SUFF2_PFMF': '22.79', 'SUFF2_FamSize': '580', 'SUFF2_Freq_HAL': '1037354', 'SUFF2_length': '3', 'SUFF2_P': '1.74E-05', 'SUFF2_P*': '2.78E-03', 'SUFF3_PFMF': '12.7', 'SUFF3_FamSize': '2898', 'SUFF3_Freq_HAL': '3857999', 'SUFF3_length': '2', 'SUFF3_P': '4.48E-05', 'SUFF3_P*': '0.02'}, None, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, {'POS': 'VB|NN|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(play)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '36', 'ROOT1_Freq_HAL': '458704'}, None, {'POS': 'JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(recent)}', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '134614'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(experience)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '297837'}, None, None, {'POS': 'JJ|NN|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(past)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '90058'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(century)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '39167'}, None, {'POS': 'RB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(sure)}>ly>', 'ROOT1_PFMF': '13.04', 'ROOT1_FamSize': '24', 'ROOT1_Freq_HAL': '362162', 'SUFF1_PFMF': '1.89', 'SUFF1_FamSize': '2898', 'SUFF1_Freq_HAL': '3857999', 'SUFF1_length': '2', 'SUFF1_P': '4.48E-05', 'SUFF1_P*': '0.02'}, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(time)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '1099121'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(history)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '124154', 'SUFF1_PFMF': '0.44', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(live)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '291215'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(life)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '240644'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(people)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '798705'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(change)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '348247'}, None, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(drama)}>ic>>ly>', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '16813', 'SUFF1_PFMF': '6.41', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03', 'SUFF2_PFMF': '5.62', 'SUFF2_FamSize': '2898', 'SUFF2_Freq_HAL': '3857999', 'SUFF2_length': '2', 'SUFF2_P': '4.48E-05', 'SUFF2_P*': '0.02'}, None, None, {'POS': 'JJ|NN|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(quick)}', 'ROOT1_PFMF': '9.09', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '92596'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>', 'PREF1_PFMF': '9.43', 'PREF1_FamSize': '499', 'PREF1_Freq_HAL': '2175658', 'PREF1_length': '2', 'PREF1_P': '1.38E-05', 'PREF1_P*': '4.63E-03', 'ROOT1_PFMF': '7.69', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '38887', 'SUFF1_PFMF': '12.14', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(typo)}>al>', 'ROOT1_PFMF': '2.85', 'ROOT1_FamSize': '36', 'ROOT1_Freq_HAL': '282037', 'SUFF1_PFMF': '3.42', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(day)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '35', 'ROOT1_Freq_HAL': '778343'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(reveal)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '32067'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'VB', 'Nmorph': '4', 'PRS_signature': '"1,1,2"', 'MorphoLexSegm': '{ion>}>ize>', 'PREF1_PFMF': '37.55', 'PREF1_FamSize': '499', 'PREF1_Freq_HAL': '2175658', 'PREF1_length': '2', 'PREF1_P': '1.38E-05', 'PREF1_P*': '4.63E-03', 'ROOT1_PFMF': '58.82', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '158531', 'SUFF1_PFMF': '38.92', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03', 'SUFF2_PFMF': '20.74', 'SUFF2_FamSize': '430', 'SUFF2_Freq_HAL': '443161', 'SUFF2_length': '3', 'SUFF2_P': '1.49E-04', 'SUFF2_P*': '0.01'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(world)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '345235'}, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(people)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '798705'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{al>}', 'PREF1_PFMF': '3.08', 'PREF1_FamSize': '358', 'PREF1_Freq_HAL': '2402388', 'PREF1_length': '2', 'PREF1_P': '6.66E-06', 'PREF1_P*': '2.47E-03', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '84483', 'SUFF1_PFMF': '1.95', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(combust)}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '1982', 'SUFF1_PFMF': '27.97', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(engine)}', 'ROOT1_PFMF': '100', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '194206'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(work)}{(day)}', 'ROOT1_PFMF': '38.82', 'ROOT1_FamSize': '86', 'ROOT1_Freq_HAL': '1051110', 'ROOT2_PFMF': '67.64', 'ROOT2_FamSize': '35', 'ROOT2_Freq_HAL': '778343'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(chance)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '72023'}, None, {'POS': 'JJ|RB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(high)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '24', 'ROOT1_Freq_HAL': '339513'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(employ)}>ee>', 'ROOT1_PFMF': '12.5', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '116155', 'SUFF1_PFMF': '4.25', 'SUFF1_FamSize': '48', 'SUFF1_Freq_HAL': '130049', 'SUFF1_length': '2', 'SUFF1_P': '1.54E-05', 'SUFF1_P*': '3.09E-04'}, None, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': 'er>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '344318', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '2274', 'SUFF1_Freq_HAL': '4569119', 'SUFF1_length': '2', 'SUFF1_P': '3.55E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '510787', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN|JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(silic)>on>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '12811', 'SUFF1_PFMF': '10.52', 'SUFF1_FamSize': '20', 'SUFF1_Freq_HAL': '128335', 'SUFF1_length': '2', 'SUFF1_P': '0', 'SUFF1_P*': '0'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(bridge)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '26961'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(micro)>on>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '2788', 'SUFF1_PFMF': '47.36', 'SUFF1_FamSize': '20', 'SUFF1_Freq_HAL': '128335', 'SUFF1_length': '2', 'SUFF1_P': '0', 'SUFF1_P*': '0'}, {'POS': 'JJ|RB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(wide)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '118804'}, None, {'POS': 'minor|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(upon)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '78918'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(leave)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '122742'}, {'POS': 'NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(home)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '29', 'ROOT1_Freq_HAL': '216780'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(family)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '145491'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(member)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '163906'}, None, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(reach)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '76970'}, None, {'POS': 'JJ|NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(wire)}>less>', 'ROOT1_PFMF': '8.33', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '37645', 'SUFF1_PFMF': '0.54', 'SUFF1_FamSize': '368', 'SUFF1_Freq_HAL': '158354', 'SUFF1_length': '4', 'SUFF1_P': '1.20E-04', 'SUFF1_P*': '2.94E-03'}, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(net)}{(work)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '297666', 'ROOT2_PFMF': '1.17', 'ROOT2_FamSize': '86', 'ROOT2_Freq_HAL': '1051110'}, None, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(util)>ize>}', 'ROOT1_PFMF': '20', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '57081', 'SUFF1_PFMF': '1.63', 'SUFF1_FamSize': '430', 'SUFF1_Freq_HAL': '443161', 'SUFF1_length': '3', 'SUFF1_P': '1.49E-04', 'SUFF1_P*': '0.01'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(satellite)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '19083'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(orbit)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '11239'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(earth)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '77215'}, None, None, None, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(common)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '142732'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(occur)}>ance>', 'ROOT1_PFMF': '100', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '53283', 'SUFF1_PFMF': '13.04', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(could)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '610350'}, None, None, {'POS': 'JJ', 'Nmorph': '4', 'PRS_signature': '"2,1,1"', 'MorphoLexSegm': 'able>', 'PREF1_PFMF': '18.87', 'PREF1_FamSize': '658', 'PREF1_Freq_HAL': '610746', 'PREF1_length': '2', 'PREF1_P': '4.26E-05', 'PREF1_P*': '4.02E-03', 'PREF2_PFMF': '42.27', 'PREF2_FamSize': '370', 'PREF2_Freq_HAL': '1256048', 'PREF2_length': '2', 'PREF2_P': '1.51E-05', 'PREF2_P*': '2.94E-03', 'ROOT1_PFMF': '42.1', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '181726', 'SUFF1_PFMF': '19.74', 'SUFF1_FamSize': '872', 'SUFF1_Freq_HAL': '1227992', 'SUFF1_length': '4', 'SUFF1_P': '3.18E-05', 'SUFF1_P*': '6.03E-03'}, None, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(turn)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '195606'}, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(century)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '39167'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(state)}>ment>', 'ROOT1_PFMF': '3.44', 'ROOT1_FamSize': '30', 'ROOT1_Freq_HAL': '451541', 'SUFF1_PFMF': '1.04', 'SUFF1_FamSize': '288', 'SUFF1_Freq_HAL': '1423689', 'SUFF1_length': '4', 'SUFF1_P': '1.26E-05', 'SUFF1_P*': '2.78E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(attempt)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '78959'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(bridge)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '26961'}, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(drama)}>ic>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '16813', 'SUFF1_PFMF': '4.73', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(change)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '348247'}, None, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>}', 'PREF1_PFMF': '7.63', 'PREF1_FamSize': '499', 'PREF1_Freq_HAL': '2175658', 'PREF1_length': '2', 'PREF1_P': '1.38E-05', 'PREF1_P*': '4.63E-03', 'ROOT1_PFMF': '10.6', 'ROOT1_FamSize': '67', 'ROOT1_Freq_HAL': '559015', 'SUFF1_PFMF': '8.32', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(able)}>ity>', 'ROOT1_PFMF': '10', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '328113', 'SUFF1_PFMF': '0.86', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, None, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(assumption)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '17588'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ance>', 'ROOT1_PFMF': '71.42', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '46378', 'SUFF1_PFMF': '22.98', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(neg)>ate>}', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '29916', 'SUFF1_PFMF': '22.11', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(need)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '591543'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(people)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '798705'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(create)}>ive>>ly>', 'ROOT1_PFMF': '63.63', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '227932', 'SUFF1_PFMF': '30.39', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1037354', 'SUFF1_length': '3', 'SUFF1_P': '1.74E-05', 'SUFF1_P*': '2.78E-03', 'SUFF2_PFMF': '17.12', 'SUFF2_FamSize': '2898', 'SUFF2_Freq_HAL': '3857999', 'SUFF2_length': '2', 'SUFF2_P': '4.48E-05', 'SUFF2_P*': '0.02'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(solve)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '15', 'ROOT1_Freq_HAL': '67167'}, {'POS': 'JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(previous)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '87239'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(quandary)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '184'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(look)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '628585'}, {'POS': 'RB|JJ|NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(back)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '69', 'ROOT1_Freq_HAL': '549038'}, None, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>}', 'PREF1_PFMF': '8.33', 'PREF1_FamSize': '13', 'PREF1_Freq_HAL': '58022', 'PREF1_length': '5', 'PREF1_P': '0', 'PREF1_P*': '0', 'ROOT1_PFMF': '7.57', 'ROOT1_FamSize': '67', 'ROOT1_Freq_HAL': '559015', 'SUFF1_PFMF': '4.69', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'minor|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(one)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '2327675'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(could)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '610350'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(argue)}', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '113797'}, None, {'POS': 'minor|NN|RB', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(with)}{(out)}', 'ROOT1_PFMF': '6.66', 'ROOT1_FamSize': '16', 'ROOT1_Freq_HAL': '3580107', 'ROOT2_PFMF': '2.94', 'ROOT2_FamSize': '35', 'ROOT2_Freq_HAL': '1415807'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(car)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '121302'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(compute)}>er>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '344318', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '2274', 'SUFF1_Freq_HAL': '4569119', 'SUFF1_length': '2', 'SUFF1_P': '3.55E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(mobile)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '19217'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(phon)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '41', 'ROOT1_Freq_HAL': '207292'}, None, None, {'POS': 'JJ', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{al>', 'PREF1_PFMF': '5.55', 'PREF1_FamSize': '19', 'PREF1_Freq_HAL': '17644', 'PREF1_length': '4', 'PREF1_P': '1.70E-04', 'PREF1_P*': '4.63E-04', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '7461', 'SUFF1_PFMF': '15.52', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(work)}>er>', 'ROOT1_PFMF': '2.35', 'ROOT1_FamSize': '86', 'ROOT1_Freq_HAL': '1051110', 'SUFF1_PFMF': '1.75', 'SUFF1_FamSize': '2274', 'SUFF1_Freq_HAL': '4569119', 'SUFF1_length': '2', 'SUFF1_P': '3.55E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(would)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '1366583'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(need)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '591543'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(find)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '435097'}, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(altern)>ate>)}', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '60362', 'SUFF1_PFMF': '5.12', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(method)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '92974'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '510787', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ate>}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '16', 'ROOT1_Freq_HAL': '144552', 'SUFF1_PFMF': '0.1', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01', 'SUFF2_PFMF': '0.43', 'SUFF2_FamSize': '1599', 'SUFF2_Freq_HAL': '6530204', 'SUFF2_length': '3', 'SUFF2_P': '9.03E-06', 'SUFF2_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'JJ|NN|RB|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(short)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '134285'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(circuit)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '24657'}, None, {'POS': 'VB|NN|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(make)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '40', 'ROOT1_Freq_HAL': '1073485'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(problem)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '510114'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(obsolete)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '4914'}, None, None, {'POS': 'RB|minor', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(how)}{(ever)}', 'ROOT1_PFMF': '20', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '1012869', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '22', 'ROOT2_Freq_HAL': '564255'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(rely)}>ance>', 'ROOT1_PFMF': '71.42', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '46378', 'SUFF1_PFMF': '22.98', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(necess)>ory>}>ly>', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '107363', 'SUFF1_PFMF': '1.12', 'SUFF1_FamSize': '356', 'SUFF1_Freq_HAL': '725186', 'SUFF1_length': '3', 'SUFF1_P': '2.62E-05', 'SUFF1_P*': '2.94E-03', 'SUFF2_PFMF': '1.17', 'SUFF2_FamSize': '2898', 'SUFF2_Freq_HAL': '3857999', 'SUFF2_length': '2', 'SUFF2_P': '4.48E-05', 'SUFF2_P*': '0.02'}, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ive>>ity>', 'ROOT1_PFMF': '36.36', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '227932', 'SUFF1_PFMF': '10.53', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1037354', 'SUFF1_length': '3', 'SUFF1_P': '1.74E-05', 'SUFF1_P*': '2.78E-03', 'SUFF2_PFMF': '11.57', 'SUFF2_FamSize': '580', 'SUFF2_Freq_HAL': '1647588', 'SUFF2_length': '3', 'SUFF2_P': '1.76E-05', 'SUFF2_P*': '4.48E-03'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(mark)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '29', 'ROOT1_Freq_HAL': '181577'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(specie)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '249'}, None, None, {'POS': 'RB|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(prior)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '43904'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(example)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '171361'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(reveal)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '32067'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(allow)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '185956'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(convenience)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '8009'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(car)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '121302'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(compute)}>er>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '344318', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '2274', 'SUFF1_Freq_HAL': '4569119', 'SUFF1_length': '2', 'SUFF1_P': '3.55E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(phon)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '41', 'ROOT1_Freq_HAL': '207292'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(release)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '144416'}, {'POS': 'JJ', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(add)}>ion>>al>', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '332926', 'SUFF1_PFMF': '1.25', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03', 'SUFF2_PFMF': '1.04', 'SUFF2_FamSize': '1431', 'SUFF2_Freq_HAL': '4704731', 'SUFF2_length': '2', 'SUFF2_P': '9.14E-06', 'SUFF2_P*': '6.64E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(time)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '1099121'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(people)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '798705'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(live)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '291215'}, None, {'POS': 'RB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(efficient)}>ly>', 'ROOT1_PFMF': '33.33', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '19176', 'SUFF1_PFMF': '6.86', 'SUFF1_FamSize': '2898', 'SUFF1_Freq_HAL': '3857999', 'SUFF1_length': '2', 'SUFF1_P': '4.48E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(efficiency)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '8343'}, None, None, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(human)}>ity>', 'ROOT1_PFMF': '5.88', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937', 'SUFF1_PFMF': '4.14', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(tackle)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '3482'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(new)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '973761'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(problem)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '510114'}, None, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(may)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '538146'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(create)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '227932'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(new)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '973761'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(issue)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '221311'}, None, None, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(exist)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '141837'}, {'POS': 'minor|NN|RB', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(with)}{(out)}', 'ROOT1_PFMF': '6.66', 'ROOT1_FamSize': '16', 'ROOT1_Freq_HAL': '3580107', 'ROOT2_PFMF': '2.94', 'ROOT2_FamSize': '35', 'ROOT2_Freq_HAL': '1415807'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(example)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '171361'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(proliferate)}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '1921', 'SUFF1_PFMF': '29.97', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'PREF1_PFMF': '17.61', 'PREF1_FamSize': '370', 'PREF1_Freq_HAL': '1256048', 'PREF1_length': '2', 'PREF1_P': '1.51E-05', 'PREF1_P*': '2.94E-03', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '43', 'ROOT1_Freq_HAL': '651056', 'SUFF1_PFMF': '17.14', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(globe)}>al>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '38772', 'SUFF1_PFMF': '2.65', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(scale)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '32879'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '63237', 'SUFF1_PFMF': '0.52', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{al>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '38772', 'SUFF1_PFMF': '2.65', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN|VB|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(warm)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '29760'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(become)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '143379'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(concern)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '87873'}, {'POS': 'JJ', 'Nmorph': '4', 'PRS_signature': '"2,1,1"', 'MorphoLexSegm': 'able>', 'PREF1_PFMF': '18.87', 'PREF1_FamSize': '658', 'PREF1_Freq_HAL': '610746', 'PREF1_length': '2', 'PREF1_P': '4.26E-05', 'PREF1_P*': '4.02E-03', 'PREF2_PFMF': '42.27', 'PREF2_FamSize': '370', 'PREF2_Freq_HAL': '1256048', 'PREF2_length': '2', 'PREF2_P': '1.51E-05', 'PREF2_P*': '2.94E-03', 'ROOT1_PFMF': '42.1', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '181726', 'SUFF1_PFMF': '19.74', 'SUFF1_FamSize': '872', 'SUFF1_Freq_HAL': '1227992', 'SUFF1_length': '4', 'SUFF1_P': '3.18E-05', 'SUFF1_P*': '6.03E-03'}, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(horse)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '35', 'ROOT1_Freq_HAL': '39630'}, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(buggy)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '4547'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(gener)>ate>}>ion>', 'ROOT1_PFMF': '15.78', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '318254', 'SUFF1_PFMF': '1.7', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01', 'SUFF2_PFMF': '2.31', 'SUFF2_FamSize': '1599', 'SUFF2_Freq_HAL': '6530204', 'SUFF2_length': '3', 'SUFF2_P': '9.03E-06', 'SUFF2_P*': '9.11E-03'}, None, {'POS': 'RB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(like)}>wise>', 'ROOT1_PFMF': '6.84', 'ROOT1_FamSize': '74', 'ROOT1_Freq_HAL': '1232275', 'SUFF1_PFMF': '5.55', 'SUFF1_FamSize': '19', 'SUFF1_Freq_HAL': '77424', 'SUFF1_length': '4', 'SUFF1_P': '2.58E-05', 'SUFF1_P*': '3.09E-04'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ance>', 'PREF1_PFMF': '14.23', 'PREF1_FamSize': '275', 'PREF1_Freq_HAL': '559431', 'PREF1_length': '2', 'PREF1_P': '4.65E-05', 'PREF1_P*': '4.02E-03', 'ROOT1_PFMF': '33.33', 'ROOT1_FamSize': '34', 'ROOT1_Freq_HAL': '169032', 'SUFF1_PFMF': '20.8', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(oil)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '15', 'ROOT1_Freq_HAL': '39902'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(create)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '227932'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(nation)}', 'ROOT1_PFMF': '9.09', 'ROOT1_FamSize': '23', 'ROOT1_Freq_HAL': '303858'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(state)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '30', 'ROOT1_Freq_HAL': '451541'}, None, None, None, {'POS': 'JJ|NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ant>', 'PREF1_PFMF': '4.01', 'PREF1_FamSize': '275', 'PREF1_Freq_HAL': '559431', 'PREF1_length': '2', 'PREF1_P': '4.65E-05', 'PREF1_P*': '4.02E-03', 'ROOT1_PFMF': '6.06', 'ROOT1_FamSize': '34', 'ROOT1_Freq_HAL': '169032', 'SUFF1_PFMF': '5.61', 'SUFF1_FamSize': '464', 'SUFF1_Freq_HAL': '1534593', 'SUFF1_length': '3', 'SUFF1_P': '7.82E-06', 'SUFF1_P*': '1.85E-03'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(tax)}>ion>', 'ROOT1_PFMF': '33.33', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '112326', 'SUFF1_PFMF': '16.52', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(allow)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '185956'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(rule)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '149114'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(party)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '91368'}, None, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ity>', 'ROOT1_PFMF': '100', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '48418', 'SUFF1_PFMF': '2.76', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(group)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '401691'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(woman)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '38', 'ROOT1_Freq_HAL': '74275'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(solut)}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '83940', 'SUFF1_PFMF': '0.5', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '54696', 'SUFF1_PFMF': '9.32', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(maverick)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '1631'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(sci)>ant>>ist>}', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '204888', 'SUFF1_PFMF': '3.23', 'SUFF1_FamSize': '464', 'SUFF1_Freq_HAL': '1534593', 'SUFF1_length': '3', 'SUFF1_P': '7.82E-06', 'SUFF1_P*': '1.85E-03', 'SUFF2_PFMF': '0.21', 'SUFF2_FamSize': '462', 'SUFF2_Freq_HAL': '382916', 'SUFF2_length': '3', 'SUFF2_P': '6.27E-05', 'SUFF2_P*': '3.71E-03'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(politic)}>ian>', 'ROOT1_PFMF': '10', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '92034', 'SUFF1_PFMF': '4.04', 'SUFF1_FamSize': '174', 'SUFF1_Freq_HAL': '394113', 'SUFF1_length': '3', 'SUFF1_P': '2.79E-05', 'SUFF1_P*': '1.70E-03'}, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(contrast)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '10700'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(state)}>ment>', 'ROOT1_PFMF': '3.44', 'ROOT1_FamSize': '30', 'ROOT1_Freq_HAL': '451541', 'SUFF1_PFMF': '1.04', 'SUFF1_FamSize': '288', 'SUFF1_Freq_HAL': '1423689', 'SUFF1_length': '4', 'SUFF1_P': '1.26E-05', 'SUFF1_P*': '2.78E-03'}, None, None, None, {'POS': 'RB|JJ|NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(even)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '494850'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(see)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '21', 'ROOT1_Freq_HAL': '800097'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(imagine)}>ion>', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '54696', 'SUFF1_PFMF': '9.32', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(consider)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '214085'}, None, None, {'POS': 'NN|JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(digit)}>al>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '53861', 'SUFF1_PFMF': '1.32', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>}', 'PREF1_PFMF': '5.42', 'PREF1_FamSize': '499', 'PREF1_Freq_HAL': '2175658', 'PREF1_length': '2', 'PREF1_P': '1.38E-05', 'PREF1_P*': '4.63E-03', 'ROOT1_PFMF': '11.76', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '158531', 'SUFF1_PFMF': '6.19', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(advent)}', 'ROOT1_PFMF': '9.09', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '28013'}, None, None, None, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(allow)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '185956'}, None, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': 'ar>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '19', 'ROOT1_Freq_HAL': '93743', 'SUFF1_PFMF': '2.34', 'SUFF1_FamSize': '129', 'SUFF1_Freq_HAL': '533962', 'SUFF1_length': '2', 'SUFF1_P': '7.49E-06', 'SUFF1_P*': '6.18E-04'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(portal)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '2394'}, None, {'POS': 'JJ|NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(medic)>al>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '65461', 'SUFF1_PFMF': '1.18', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(inform)}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '510787', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(permit)}', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '48680'}, {'POS': 'JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(patient)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '32789'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(self)}', 'ROOT1_PFMF': '22.72', 'ROOT1_FamSize': '23', 'ROOT1_Freq_HAL': '458870'}, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ly>', 'ROOT1_PFMF': '100', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '87239', 'SUFF1_PFMF': '1.82', 'SUFF1_FamSize': '2898', 'SUFF1_Freq_HAL': '3857999', 'SUFF1_length': '2', 'SUFF1_P': '4.48E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(close)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '16', 'ROOT1_Freq_HAL': '188225'}, None, None, None, {'POS': 'JJ|NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(medic)>al>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '65461', 'SUFF1_PFMF': '1.18', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(lay)}(man)', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '25', 'ROOT1_Freq_HAL': '55753', 'ROOT2_PFMF': '10', 'ROOT2_FamSize': '211', 'ROOT2_Freq_HAL': '300379'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ory>', 'PREF1_PFMF': '20.35', 'PREF1_FamSize': '114', 'PREF1_Freq_HAL': '343106', 'PREF1_length': '5', 'PREF1_P': '8.74E-06', 'PREF1_P*': '4.63E-04', 'ROOT1_PFMF': '20', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '13680', 'SUFF1_PFMF': '21.97', 'SUFF1_FamSize': '356', 'SUFF1_Freq_HAL': '725186', 'SUFF1_length': '3', 'SUFF1_P': '2.62E-05', 'SUFF1_P*': '2.94E-03'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': 'ion>', 'PREF1_PFMF': '4.42', 'PREF1_FamSize': '114', 'PREF1_Freq_HAL': '343106', 'PREF1_length': '5', 'PREF1_P': '8.74E-06', 'PREF1_P*': '4.63E-04', 'ROOT1_PFMF': '17.24', 'ROOT1_FamSize': '59', 'ROOT1_Freq_HAL': '716577', 'SUFF1_PFMF': '7.07', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>', 'PREF1_PFMF': '19.88', 'PREF1_FamSize': '358', 'PREF1_Freq_HAL': '2402388', 'PREF1_length': '2', 'PREF1_P': '6.66E-06', 'PREF1_P*': '2.47E-03', 'ROOT1_PFMF': '6.25', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '20108', 'SUFF1_PFMF': '19.14', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(arrive)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '37242'}, None, None, None, {'POS': 'JJ|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(surprise)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '52522'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(corner)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '32765'}, None, None, None, None, {'POS': 'minor|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(one)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '2327675'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(architect)}', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '30688'}, None, None, None, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(base)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '341952'}, None, {'POS': 'NN|minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(idea)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '203042'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(emergency)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '14348'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(care)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '174627'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(technique)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '44405'}, None, None, {'POS': 'JJ|RB', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': 'ly>', 'PREF1_PFMF': '0.87', 'PREF1_FamSize': '798', 'PREF1_Freq_HAL': '538545', 'PREF1_length': '2', 'PREF1_P': '7.24E-05', 'PREF1_P*': '6.03E-03', 'ROOT1_PFMF': '4.1', 'ROOT1_FamSize': '74', 'ROOT1_Freq_HAL': '1232275', 'SUFF1_PFMF': '2.55', 'SUFF1_FamSize': '2898', 'SUFF1_Freq_HAL': '3857999', 'SUFF1_length': '2', 'SUFF1_P': '4.48E-05', 'SUFF1_P*': '0.02'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(marriage)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '27179'}, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(econom)>ic>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '84011', 'SUFF1_PFMF': '0.49', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(medicine)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '21940'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(heal)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '19345'}, {'POS': 'JJ|NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(tense)}', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '11367'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(environ)}>ment>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '105667', 'SUFF1_PFMF': '0.69', 'SUFF1_FamSize': '288', 'SUFF1_Freq_HAL': '1423689', 'SUFF1_length': '4', 'SUFF1_P': '1.26E-05', 'SUFF1_P*': '2.78E-03'}, None, None, None, None, None, None, None, None, None, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(last)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '306761'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(example)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '171361'}, {'POS': 'VB|NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(act)>al>}>y>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '59', 'ROOT1_Freq_HAL': '716577', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03', 'SUFF2_PFMF': '0.12', 'SUFF2_FamSize': '2486', 'SUFF2_Freq_HAL': '3870233', 'SUFF2_length': '1', 'SUFF2_P': '4.99E-05', 'SUFF2_P*': '0.02'}, {'POS': 'VB|NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ity>', 'ROOT1_PFMF': '5.88', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937', 'SUFF1_PFMF': '4.14', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ance>', 'ROOT1_PFMF': '71.42', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '46378', 'SUFF1_PFMF': '22.98', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': 'ate>}>ion>', 'PREF1_PFMF': '13.68', 'PREF1_FamSize': '96', 'PREF1_Freq_HAL': '397964', 'PREF1_length': '1', 'PREF1_P': '1.76E-05', 'PREF1_P*': '1.08E-03', 'ROOT1_PFMF': '66.66', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '33227', 'SUFF1_PFMF': '15.17', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01', 'SUFF2_PFMF': '19.21', 'SUFF2_FamSize': '1599', 'SUFF2_Freq_HAL': '6530204', 'SUFF2_length': '3', 'SUFF2_P': '9.03E-06', 'SUFF2_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(small)}{(pox)}', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '191331', 'ROOT2_PFMF': '33.33', 'ROOT2_FamSize': '4', 'ROOT2_Freq_HAL': '1740'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(disease)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '30119'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(ravage)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '1089'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(race)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '23', 'ROOT1_Freq_HAL': '111227'}, {'POS': 'minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(since)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '320454'}, {'POS': 'JJ', 'Nmorph': '4', 'PRS_signature': '"1,1,2"', 'MorphoLexSegm': 'ic>}>al>', 'PREF1_PFMF': '81.34', 'PREF1_FamSize': '135', 'PREF1_Freq_HAL': '155948', 'PREF1_length': '3', 'PREF1_P': '1.03E-04', 'PREF1_P*': '2.47E-03', 'ROOT1_PFMF': '83.33', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '124154', 'SUFF1_PFMF': '78.47', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03', 'SUFF2_PFMF': '83.77', 'SUFF2_FamSize': '1431', 'SUFF2_Freq_HAL': '4704731', 'SUFF2_length': '2', 'SUFF2_P': '9.14E-06', 'SUFF2_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(day)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '35', 'ROOT1_Freq_HAL': '778343'}, None, None, {'POS': 'RB|minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(yet)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '159994'}, None, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(vaccine)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '13304'}, None, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, {'POS': 'VB|NN|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(dare)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '15225'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(imagine)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '54696'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(world)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '345235'}, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(small)}{(pox)}', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '191331', 'ROOT2_PFMF': '33.33', 'ROOT2_FamSize': '4', 'ROOT2_Freq_HAL': '1740'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(use)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '15', 'ROOT1_Freq_HAL': '1414857'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(battle)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '40439'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(plan)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '145043'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(draw)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '76896'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(small)}{(pox)}', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '191331', 'ROOT2_PFMF': '33.33', 'ROOT2_FamSize': '4', 'ROOT2_Freq_HAL': '1740'}, None, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(system)}>ic>>ly>', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '688810', 'SUFF1_PFMF': '14.11', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03', 'SUFF2_PFMF': '10.42', 'SUFF2_FamSize': '2898', 'SUFF2_Freq_HAL': '3857999', 'SUFF2_length': '2', 'SUFF2_P': '4.48E-05', 'SUFF2_P*': '0.02'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(target)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '36927'}, None, {'POS': 'VB', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ate>}', 'PREF1_PFMF': '23.15', 'PREF1_FamSize': '96', 'PREF1_Freq_HAL': '397964', 'PREF1_length': '1', 'PREF1_P': '1.76E-05', 'PREF1_P*': '1.08E-03', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '13255', 'SUFF1_PFMF': '27.02', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01'}, None, None, None, None, {'POS': 'RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(always)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '200870'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(mark)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '29', 'ROOT1_Freq_HAL': '181577'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(experience)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '297837'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(discover)}>y>', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '44840', 'SUFF1_PFMF': '2.45', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(fire)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '50', 'ROOT1_Freq_HAL': '118119'}, None, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>', 'PREF1_PFMF': '3.64', 'PREF1_FamSize': '358', 'PREF1_Freq_HAL': '2402388', 'PREF1_length': '2', 'PREF1_P': '6.66E-06', 'PREF1_P*': '2.47E-03', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '88079', 'SUFF1_PFMF': '2.56', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(give)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '455818'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(history)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '124154', 'SUFF1_PFMF': '0.44', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(race)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '23', 'ROOT1_Freq_HAL': '111227'}, None, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(limit)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '155817'}, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(number)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '364121'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(problem)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '510114'}, None, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(new)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '973761'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(old)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '281185'}, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(tackle)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '3482'}, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(need)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '591543'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(retreat)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '4397'}, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(attitude)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '26046'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(new)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '973761'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(thing)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '1328712'}, None, None, {'POS': 'RB|minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(rather)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '146049'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(embrace)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '5662'}, None, {'POS': 'JJ|NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(hope)}>ful>', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '208147', 'SUFF1_PFMF': '10.23', 'SUFF1_FamSize': '343', 'SUFF1_Freq_HAL': '429561', 'SUFF1_length': '3', 'SUFF1_P': '5.82E-05', 'SUFF1_P*': '3.86E-03'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(posture)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '2967'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(possible)}>ity>', 'ROOT1_PFMF': '40', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '259443', 'SUFF1_PFMF': '2.24', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB|NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '54696', 'SUFF1_PFMF': '9.32', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None] + self.assertEqual(doc._.morpholex,morpholex) + + def test_morpholexsegm(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + + morpholexsegm = [None, '{(state)}>ment>', '{(link)}', '{(tech)(log)>y>}', '{(neg)>ate>}>ive>>ly>', None, '{(free)}', '{(think)}', '{(play)}', None, '{(recent)}', '{(human)}', '{(experience)}', None, None, '{(past)}', '{(century)}', None, '{(sure)}>ly>', None, None, None, None, '{(time)}', None, '{(history)>y>}', None, None, '{(live)}', '{(live)}', None, '{(people)}', None, '{(change)}', None, '{(drama)}>ic>>ly>', None, None, '{(quick)}', '{ion>', None, None, '{(typo)}>al>', '{(day)}', '{(reveal)}', None, '{(tech)(log)>y>}', None, '{ion>}>ize>', None, '{(world)}', None, None, '{(people)}', '{al>}', '{(combust)}>ion>', '{(engine)}', None, None, None, '{(work)}{(day)}', None, '{(chance)}', None, '{(high)}', None, None, '{(employ)}>ee>', None, 'er>', None, '{ion>', None, '{(silic)>on>}', '{(bridge)}', None, None, None, '{(micro)>on>}', '{(wide)}', None, '{(upon)}', '{(leave)}', '{(home)}', None, '{(family)}', '{(member)}', None, None, '{(reach)}', None, '{(wire)}>less>', '{(net)}{(work)}', None, '{(util)>ize>}', '{(satellite)}', '{(orbit)}', None, '{(earth)}', None, None, None, None, '{(common)}', '{(occur)}>ance>', '{(could)}', None, None, 'able>', None, None, '{(turn)}', None, None, None, '{(century)}', None, None, None, '{(state)}>ment>', '{(attempt)}', None, '{(bridge)}', None, '{(drama)}>ic>', '{(change)}', None, None, '{ion>}', None, None, '{(able)}>ity>', None, '{(human)}', None, '{(think)}', None, None, None, None, '{(assumption)}', None, None, None, '{ance>', None, '{(tech)(log)>y>}', '{(neg)>ate>}', None, '{(need)}', None, '{(people)}', None, '{(think)}', '{(create)}>ive>>ly>', None, '{(solve)}', '{(previous)}', '{(quandary)}', None, '{(look)}', '{(back)}', None, None, '{ion>}', None, '{(one)}', '{(could)}', '{(argue)}', None, '{(with)}{(out)}', None, '{(car)}', None, '{(compute)}>er>', None, None, '{(mobile)}', '{(phon)}', None, None, '{al>', '{(work)}>er>', '{(would)}', '{(need)}', None, '{(find)}', '{(altern)>ate>)}', '{(method)}', None, '{ion>', '{ate>}>ion>', None, '{(tech)(log)>y>}', '{(short)}', '{(circuit)}', None, '{(think)}', None, '{(make)}', None, '{(problem)}', '{(obsolete)}', None, None, '{(how)}{(ever)}', None, None, '{(rely)}>ance>', None, '{(tech)(log)>y>}', None, None, '{(necess)>ory>}>ly>', '{ive>>ity>', None, '{(mark)}', None, '{(human)}', '{(species)}', None, None, '{(prior)}', '{(example)}', '{(reveal)}', None, '{(tech)(log)>y>}', '{(allow)}', None, '{(convenience)}', None, None, '{(car)}', None, '{(compute)}>er>', None, '{(phon)}', None, '{(release)}', '{(add)}>ion>>al>', '{(time)}', None, '{(people)}', None, '{(live)}', None, '{(efficient)}>ly>', None, None, '{(efficiency)}', None, None, '{y>}', '{(free)}', '{(human)}>ity>', None, None, None, '{(tackle)}', '{(new)}', '{(problem)}', None, None, '{(may)}', None, '{(create)}', '{(new)}', '{(issue)}', None, None, None, '{(exist)}', '{(with)}{(out)}', '{(tech)(log)>y>}', None, None, '{(example)}', None, None, '{(proliferate)}>ion>', None, '{ion>', None, None, '{(globe)}>al>', '{(scale)}', None, None, '{y>}', '{al>', '{(warm)}', '{(become)}', None, '{(concern)}', 'able>', None, None, '{(horse)}', None, None, None, '{(buggy)}', '{(gener)>ate>}>ion>', None, '{(like)}>wise>', '{ance>', None, '{(oil)}', None, '{(create)}', '{(nation)}', None, '{(state)}', None, None, None, '{ant>', None, '{(tax)}>ion>', None, '{(allow)}', '{(rule)}', '{(party)}', None, '{ity>', '{(group)}', None, None, '{(women)}', None, '{(solut)}>ion>', None, None, '{ion>', None, '{(maverick)}', '{(sci)>ant>>ist>}', None, '{(politic)}>ian>', None, None, None, '{(contrast)}', None, None, '{(state)}>ment>', None, None, None, '{(even)}', '{(see)}', None, '{(tech)(log)>y>}', '{(free)}', None, '{(human)}', '{(imagine)}>ion>', None, '{(consider)}', None, None, '{(digit)}>al>', '{ion>}', None, None, '{(advent)}', None, None, None, None, '{(allow)}', None, None, 'ar>}', None, '{(portal)}', None, '{(medic)>al>}', '{(inform)}>ion>', None, '{(permit)}', '{(patient)}', None, '{(self)}', '{ly>', '{(close)}', None, None, None, '{(medic)>al>}', '{(lay)}(man)', None, None, '{ory>', 'ion>', None, '{ion>', None, '{(arrive)}', None, None, None, '{(surprise)}', '{(corner)}', None, None, None, None, '{(one)}', None, None, '{(architect)}', None, None, None, '{(mill)>en>>ium>}', '{(develop)}>ment>', '{(goal)}', None, '{(base)}', None, '{(idea)}', None, '{(emergency)}', '{(care)}', None, '{(technique)}', None, None, 'ly>', '{(marriage)}', None, '{(econom)>ics>}', None, '{(medicine)}', None, '{(heal)}', '{(tense)}', None, None, '{(environ)}>ment>', None, '{(south)}', '{(america)}', None, '{(east)}>ern>', '{(europe)}', None, None, None, '{(last)}', '{(example)}', '{y>}', '{(act)>al>}>y>', '{ity>', None, None, '{ance>', None, '{(tech)(log)>y>}', None, 'ate>}>ion>', None, '{(small)}{(pox)}', None, None, '{(disease)}', None, '{(ravage)}', None, '{(human)}', '{(race)}', '{(since)}', 'ic>}>al>', '{(day)}', None, None, '{(yet)}', None, None, '{(tech)(log)>y>}', None, '{(vaccine)}', None, '{(free)}', '{(think)}', '{(human)}', '{(dare)}', None, '{(imagine)}', None, '{(world)}', '{(free)}', None, '{(small)}{(pox)}', None, '{(use)}', '{(tech)(log)>y>}', None, '{(battle)}', '{(plan)}', None, '{(draw)}>n>', None, None, None, '{(small)}{(pox)}', None, '{(system)}>ic>>ly>', None, None, '{ate>}', None, None, '{(tech)(log)>y>}', None, '{(always)}', '{(mark)}', None, '{(human)}', '{(experience)}', None, None, None, '{(discover)}>y>', None, '{(fire)}', None, None, '{ion>', None, None, None, '{(give)}>en>', None, '{(history)>y>}', None, None, '{(human)}', '{(race)}', None, None, None, None, None, '{(limit)}', None, None, '{(number)}', None, '{(problem)}', None, None, '{(new)}', None, '{(old)}', None, None, None, None, '{(tackle)}', None, None, None, None, '{(need)}', None, '{(retreat)}', None, None, '{(ludd)}>ite>', '{(attitude)}', None, '{(new)}', '{(thing)}', None, None, '{(rather)}', '{(embrace)}', None, '{(hope)}>ful>', '{(posture)}', None, None, '{(possible)}>ity>', None, '{(tech)(log)>y>}', '{ion>', None] + self.assertEqual(doc._.morpholexSegm,morpholexsegm) + + def test_morphnums(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + morphnums = [None, '2', '1', '3', '4', None, '1', '1', '1', None, '1', '1', '1', None, None, '1', '1', None, '2', None, None, None, None, '1', None, '2', None, None, '1', '1', None, '1', None, '1', None, '3', None, None, '1', '3', None, None, '2', '1', '1', None, '3', None, '4', None, '1', None, None, '1', '2', None, '1', None, None, '2', None, '1', None, None, '3', '2', '1', None, None, None, '2', None, '1', None, '1', None, None, '2', None, '2', None, None, '2', None, '2', '2', None, '2', '1', None, None, None, '2', '1', None, '1', '1', '1', None, '1', '1', None, None, '1', None, '2', '2', None, '2', '1', '1', None, '1', None, None, None, None, '1', '2', '1', None, None, '4', None, None, '1', None, None, None, '1', None, None, None, '2', '1', None, '1', None, '2', '1', None, None, '3', None, None, '2', None, '1', None, '1', None, None, None, None, '1', None, None, None, '2', '2', None, '3', '2', None, '1', None, '1', None, '1', '3', None, '1', '1', '1', None, '1', '1', None, None, '3', None, '1', '1', '1', None, '2', None, '1', None, '2', None, None, '1', '1', None, None, '3', '2', '1', '1', None, '1', '2', '1', None, '2', None, '2', '2', None, '3', None, '3', '1', '1', None, '1', None, '1', None, '1', '1', None, None, '2', None, None, '2', None, '3', None, None, '3', '2', None, '3', None, '1', None, '1', '1', None, None, '1', '1', '1', None, '3', '1', None, '1', None, None, '1', None, '2', None, '1', None, '1', '3', '1', None, '1', None, '1', None, '2', None, None, '1', None, None, '2', None, '1', None, '1', None, '1', None, None, None, None, '1', None, '3', '1', '2', None, None, None, '1', '1', '1', None, None, '1', None, '1', '1', '1', None, None, None, '1', '2', '3', None, None, '1', None, None, '2', None, '2', None, '2', None, '1', None, '1', '3', None, None, '2', '1', None, None, '2', '2', '2', None, '2', '1', None, '2', '1', '1', None, '1', '4', None, None, '1', None, None, None, '1', '3', None, '2', '3', None, '1', None, '1', '1', None, '1', None, None, None, '3', None, '2', None, '1', '1', '1', None, '2', '2', '1', None, None, '1', None, '2', None, None, '2', '1', '2', None, '2', '2', None, '1', '3', None, '2', None, None, None, '1', None, None, '2', None, None, None, '1', '1', None, '3', '1', None, '1', '2', None, '1', None, None, '2', '3', None, None, '1', None, None, None, None, '1', None, None, '2', '2', None, '1', None, None, None, None, '2', None, '1', None, '2', '2', None, '1', '1', None, '1', '2', '1', None, None, None, '1', '1', '1', None, None, '1', '1', '2', None, '1', None, None, '2', '1', None, None, None, '2', '2', None, None, '2', '3', '3', None, '3', None, '1', None, None, None, '1', '1', None, None, None, None, '1', None, None, '1', None, None, None, '3', '2', '1', None, '1', None, '1', None, '1', '1', None, '1', None, None, '3', '1', None, '2', None, '1', None, '1', '1', None, None, '2', None, '1', '1', None, '2', '1', None, None, None, '1', '1', '2', None, None, '1', None, None, '3', '3', '2', '1', None, None, '1', None, '2', None, None, '2', None, '2', None, '3', None, '2', '1', None, None, None, '1', None, '1', None, None, '1', None, '1', '1', None, '1', '4', None, '2', None, None, '1', None, '1', None, '1', '1', '1', '4', '1', None, None, '1', None, None, '3', None, '1', None, '1', '1', '1', '1', None, '1', None, '1', '1', None, '2', None, '1', '3', None, '1', '1', None, '2', None, None, None, '2', None, '3', None, None, '3', None, None, '3', None, '1', '1', None, '1', '1', None, None, None, '2', None, '1', None, None, '3', None, None, None, '2', None, '2', None, None, '1', '1', None, None, None, None, None, '1', None, None, '1', None, '1', None, None, '1', None, '1', None, None, None, None, '1', None, None, None, None, '1', None, '1', None, None, '2', '1', None, '1', '1', None, None, '1', '1', None, '2', '1', None, None, '2', None, '3', '2', None, '1', '1', None, '1', '2', None] + self.assertEqual(doc._.morphnums,morphnums) + + def test_mean_nMorph(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + mean_nMorph = 1.5765765765765767 + self.assertEqual(doc._.mean_nMorph,mean_nMorph) + + def test_med_nMorph(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_nMorph,1.0) + + def test_max_nMorph(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_nMorph,4.0) + + def test_min_nMorph(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_nMorph,1.0) + + def test_std_nMorph(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_nMorph,0.7747858182417163) + + def test_root_freqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + root_freqs_HAL = [None, 451541, 82099, 322084, 29916, None, 323757, 746094, 458704, None, 134614, 222937, 297837, None, None, 90058, 39167, None, 362162, None, None, None, None, 1099121, None, 124154, None, None, 291215, 240644, None, 798705, None, 348247, None, 16813, None, None, 92596, 38887, None, None, 282037, 778343, 32067, None, 322084, None, 158531, None, 345235, None, None, 798705, 9931, None, 1051110, None, None, 19217, None, 434649, None, None, 84483, 1982, 194206, None, None, None, 778343, None, 72023, None, 339513, None, None, 116155, None, 716577, None, None, 344318, None, 220761, 510787, None, 12811, 26961, None, None, None, 2788, 118804, None, 78918, 122742, 216780, None, 145491, 163906, None, None, 76970, None, 37645, 297666, None, 57081, 19083, 11239, None, 77215, None, None, None, None, 142732, 53283, 610350, None, None, 181726, None, None, 195606, None, None, None, 39167, None, None, None, 451541, 78959, None, 26961, None, 16813, 348247, None, None, 559015, None, None, 328113, None, 222937, None, 746094, None, None, None, None, 17588, None, None, None, 115343, 46378, None, 322084, 29916, None, 591543, None, 798705, None, 746094, 227932, None, 67167, 87239, 184, None, 628585, 549038, None, None, 559015, None, 2327675, 610350, 113797, None, 1415807, None, 121302, None, 344318, None, None, 19217, 207292, None, None, 7461, 1051110, 1366583, 591543, None, 435097, 60362, 92974, None, 395451, None, 510787, 220761, None, 144552, None, 322084, 134285, 24657, None, 746094, None, 1073485, None, 510114, 4914, None, None, 564255, None, None, 46378, None, 322084, None, None, 107363, 506137, None, 227932, None, 181577, None, 222937, 249, None, None, 43904, 171361, 32067, None, 322084, 185956, None, 8009, None, None, 121302, None, 344318, None, 207292, None, 144416, 332926, 1099121, None, 798705, None, 291215, None, 19176, None, None, 8343, None, None, 506137, None, 591543, None, 222937, None, 746094, None, None, None, None, 364953, None, 322084, 323757, 222937, None, None, None, 3482, 973761, 510114, None, None, 538146, None, 227932, 973761, 221311, None, None, None, 141837, 1415807, 322084, None, None, 171361, None, None, 1921, None, 19217, None, 559015, None, 591543, None, 19795, 651056, None, None, 38772, 32879, None, None, 115343, 63237, 56361, None, 23748, 173720, None, 38772, 29760, 143379, None, 87873, 181726, None, None, 39630, None, None, None, 4547, 318254, None, 1232275, 169032, None, 39902, None, 227932, 303858, None, 451541, None, None, None, 169032, None, 112326, None, 185956, 149114, 91368, None, 340039, 48418, 401691, None, None, 74275, None, 83940, None, None, 38097, 510114, 311285, None, 624, 54696, None, 1631, 204888, None, 92034, None, None, None, 10700, None, None, 451541, None, None, None, 494850, 800097, None, 322084, 323757, None, 222937, 54696, None, 214085, None, None, 53861, 158531, None, None, 28013, None, None, None, None, 185956, None, None, 5478, 348247, None, 203042, None, None, None, None, 93743, None, 2394, None, 65461, 510787, None, 48680, 32789, None, 458870, 254138, 12221, None, None, None, 510787, 49260, 73303, None, None, 28283, 233035, 81153, None, 746094, None, None, 87239, 188225, None, None, None, 65461, 55753, None, None, 115343, 13680, 716577, None, 20108, None, 37242, None, None, None, 52522, 32765, None, None, None, None, 2327675, None, None, 30688, None, None, None, None, None, None, None, 341952, None, 203042, None, 14348, 174627, None, 44405, None, None, 1232275, 27179, None, 84011, None, 21940, None, 19345, 11367, None, None, 105667, None, None, None, None, None, None, None, None, None, 306761, 171361, 282157, None, None, 208147, None, None, 322084, 716577, 282157, 208147, None, None, 95757, None, 222937, None, None, 115343, None, 46378, None, 322084, None, 259443, 42643, None, None, None, 34271, None, 214085, None, None, 230285, None, 39167, 18126, None, 197347, 33227, None, 1740, None, None, 30119, None, 1089, None, 222937, 111227, 320454, 124154, 778343, None, None, 159994, None, None, 322084, None, 13304, None, 323757, 746094, 222937, 15225, None, 54696, None, 345235, 323757, None, 1740, None, 1414857, 322084, None, 40439, 145043, None, 76896, None, None, None, 1740, None, 688810, 36927, None, 13255, None, None, None, None, 200870, 181577, None, 222937, 297837, None, None, None, 44840, None, 118119, None, None, 88079, None, None, None, 455818, None, 124154, None, None, 222937, 111227, None, None, None, None, None, 155817, None, None, 364121, None, 510114, None, None, 973761, None, 281185, None, None, None, None, 3482, None, None, None, None, 591543, None, 4397, None, None, None, 26046, None, 973761, 1328712, None, None, 146049, 5662, None, 208147, 2967, None, None, 259443, None, 322084, 282157, None, 973761, 21313, None, 222937, 54696, None] + self.assertEqual(doc._.root_freqs_HAL, root_freqs_HAL) + + def test_mean_freqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + mean_freqs_HAL = 275457.9043478261 + self.assertEqual(doc._.mean_freq_HAL, mean_freqs_HAL) + + def test_med_freqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_freq_HAL,181577) + + def test_max_freqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_freq_HAL,2327675) + + def test_min_freqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_freq_HAL,184) + + def test_std_freqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_freq_HAL,330014.93612255837) + + def test_log_root_freqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + log_root_freqs_HAL = [None, 13.020421456132732, 11.315681115098373, 12.682567660071214, 10.306148733311558, None, 12.687748513383763, 13.522606876609819, 13.036160400843732, None, 11.810166702670976, 12.314644499317684, 12.60430163597371, None, None, 11.408209186191696, 10.575589834500324, None, 12.79984690451562, None, None, None, None, 13.91002132741618, None, 11.7292780095071, None, None, 12.581817104967033, 12.391073941945889, None, 13.590746945058667, None, 12.76066727710387, None, 9.729907675678929, None, None, 11.436001223156996, 10.568415283521176, None, None, 12.549793546987297, 13.564922580040115, 10.375582742942056, None, 12.682567660071214, None, 11.97370543677171, None, 12.751980623550002, None, None, 13.590746945058667, 9.203416456903359, None, 13.86535730660883, None, None, 9.863550582932428, None, 12.982294087802849, None, None, 11.344305609673212, 7.591861714889934, 12.176674730345614, None, None, None, 13.564922580040115, None, 11.184740791431123, None, 12.735267516853085, None, None, 11.662680785055064, None, 13.482240987328971, None, None, 12.749320927948446, None, 12.304835947349302, 13.143707952542577, None, 9.458059455856434, 10.202146656326484, None, None, None, 7.933079771880415, 11.685230355377529, None, 11.276164617696834, 11.717839870416817, 12.286638293369741, None, 11.887869508010441, 12.007048371749491, None, None, 11.251171014528364, None, 10.535955422284223, 12.603727331546361, None, 10.952226590708511, 9.856553165346915, 9.327145151518268, None, 11.254349017657102, None, None, None, None, 11.86872402499674, 10.883372609935437, 13.321787842097946, None, None, 12.110255337157891, None, None, 12.183857710959016, None, None, None, 10.575589834500324, None, None, None, 13.020421456132732, 11.276684009386841, None, 10.202146656326484, None, 9.729907675678929, 12.76066727710387, None, None, 13.233931585408705, None, None, 12.701113340230703, None, 12.314644499317684, None, 13.522606876609819, None, None, None, None, 9.7749721303007, None, None, None, 11.65566557688837, 10.744580487844381, None, 12.682567660071214, 10.306148733311558, None, 13.290489656292548, None, 13.590746945058667, None, 13.522606876609819, 12.336802617838474, None, 11.114937334467607, 11.37640675761805, 5.214935757608986, None, 13.351226540509186, 13.21592293485404, None, None, 13.233931585408705, None, 14.66038047327681, 13.321787842097946, 11.64217143828822, None, 14.163210244511161, None, 11.706038582842382, None, 12.749320927948446, None, None, 9.863550582932428, 12.241883706265792, None, None, 8.917444732471514, 13.86535730660883, 14.12782402162024, 13.290489656292548, None, 12.983324273718326, 11.008115046859025, 11.440075163155973, None, 12.887782164709064, None, 13.143707952542577, 12.304835947349302, None, 11.881394583391666, None, 12.682567660071214, 11.80771968602779, 10.11281611481661, None, 13.522606876609819, None, 13.886420923217788, None, 13.142389509133297, 8.499843553081124, None, None, 13.243261555960858, None, None, 10.744580487844381, None, 12.682567660071214, None, None, 11.583970895183207, 13.13456266261131, None, 12.336802617838474, None, 12.109435085145911, None, 12.314644499317684, 5.517452896464707, None, None, 10.689760711085556, 12.051527721332148, 10.375582742942056, None, 12.682567660071214, 12.133265365571027, None, 8.988321188323683, None, None, 11.706038582842382, None, 12.749320927948446, None, 12.241883706265792, None, 11.880453302626728, 12.715675522045341, 13.91002132741618, None, 13.590746945058667, None, 12.581817104967033, None, 9.86141477611422, None, None, 9.029178142902074, None, None, 13.13456266261131, None, 13.290489656292548, None, 12.314644499317684, None, 13.522606876609819, None, None, None, None, 12.807523857150143, None, 12.682567660071214, 12.687748513383763, 12.314644499317684, None, None, None, 8.155362120328135, 13.788921172637307, 13.142389509133297, None, None, 13.195885177792945, None, 12.336802617838474, 13.788921172637307, 12.307324231084891, None, None, None, 11.862433789932195, 14.163210244511161, 12.682567660071214, None, None, 12.051527721332148, None, None, 7.560601162768557, None, 9.863550582932428, None, 13.233931585408705, None, 13.290489656292548, None, 9.89318465954023, 13.386350938996445, None, None, 10.5654536156125, 10.400589435082308, None, None, 11.65566557688837, 11.054644851868005, 10.939532708935408, None, 10.075253595390565, 12.065200086648758, None, 10.5654536156125, 10.300920488947028, 11.873246752909115, None, 11.383647869252256, 12.110255337157891, None, None, 10.58734168618448, None, None, None, 8.42222295382501, 12.670605085075938, None, 14.024372612452714, 12.037843325093355, None, 10.594181726935007, None, 12.336802617838474, 12.624315765997553, None, 13.020421456132732, None, None, None, 12.037843325093355, None, 11.629160636636323, None, 12.133265365571027, 11.912466393058425, 11.422650586730443, None, 12.73681559589648, 10.787626924393198, 12.903438415316696, None, None, 11.215529700330809, None, 11.337857536887581, None, None, 10.547890817857143, 13.142389509133297, 12.648464170238247, None, 6.436150368369428, 10.90954585959363, None, 7.396948602621014, 12.230218767358252, None, 11.429913352976165, None, None, None, 9.277999020449997, None, None, 13.020421456132732, None, None, None, 13.112009965324837, 13.592488249299878, None, 12.682567660071214, 12.687748513383763, None, 12.314644499317684, 10.90954585959363, None, 12.274128411404117, None, None, 10.89416193284752, 11.97370543677171, None, None, 10.240423967124363, None, None, None, None, 12.133265365571027, None, None, 8.608495349823023, 12.76066727710387, None, 12.221168133175508, None, None, None, None, 11.448312274378296, None, 7.780720886117918, None, 11.089209824443223, 13.143707952542577, None, 10.793023547101463, 10.397848372270873, None, 13.036522224526205, 12.445632705549428, 9.410911062438, None, None, None, 13.143707952542577, 10.804867671674529, 11.202356814731635, None, None, 10.250016196420983, 12.35894393585903, 11.304091540836126, None, 13.522606876609819, None, None, 11.37640675761805, 12.145393334718323, None, None, None, 11.089209824443223, 10.928686499533763, None, None, 11.65566557688837, 9.523690191176541, 13.482240987328971, None, 9.908873024812467, None, 10.525192435641708, None, None, None, 10.86898740842283, 10.397116151473597, None, None, None, None, 14.66038047327681, None, None, 10.331626977683165, None, None, None, None, None, None, None, 12.742425655317444, None, 12.221168133175508, None, 9.571365838652174, 12.070407549607491, None, 10.701107354692605, None, None, 14.024372612452714, 10.210199895269655, None, 11.338703021632888, None, 9.99606673382922, None, 9.870189267134613, 9.33846969968671, None, None, 11.568047918760465, None, None, None, None, None, None, None, None, None, 12.633824221750912, 12.051527721332148, 12.55021893258803, None, None, 12.245999839836097, None, None, 12.682567660071214, 13.482240987328971, 12.55021893258803, 12.245999839836097, None, None, 11.46956901142031, None, 12.314644499317684, None, None, 11.65566557688837, None, 10.744580487844381, None, 12.682567660071214, None, 12.466292304281597, 10.660618412837621, None, None, None, 10.442054794483777, None, 12.274128411404117, None, None, 12.347072951251615, None, 10.575589834500324, 9.805102650614726, None, 12.19271887955665, 10.411118077405577, None, 7.461640392208575, None, None, 10.312911480831472, None, 6.9930151229329605, None, 12.314644499317684, 11.619328437081467, 12.677494019301028, 11.7292780095071, 13.564922580040115, None, None, 11.982891593512822, None, None, 12.682567660071214, None, 9.495820020872763, None, 12.687748513383763, 13.522606876609819, 12.314644499317684, 9.630694092578098, None, 10.90954585959363, None, 12.751980623550002, 12.687748513383763, None, 7.461640392208575, None, 14.162539023881891, 12.682567660071214, None, 10.60754994483658, 11.884785529164079, None, 11.250209138536196, None, None, None, 7.461640392208575, None, 13.442720749992596, 10.516698269778406, None, 9.492130118723125, None, None, None, None, 12.210413211628595, 12.109435085145911, None, 12.314644499317684, 12.60430163597371, None, None, None, 10.710855877186097, None, 11.67944786985521, None, None, 11.385989418016944, None, None, None, 13.029848886018799, None, 11.7292780095071, None, None, 12.314644499317684, 11.619328437081467, None, None, None, None, None, 11.956437520715296, None, None, 12.805241508962812, None, 13.142389509133297, None, None, 13.788921172637307, None, 12.546768094679367, None, None, None, None, 8.155362120328135, None, None, None, None, 13.290489656292548, None, 8.388677769180811, None, None, None, 10.167619484527648, None, 13.788921172637307, 14.099720609922676, None, None, 11.89169746082223, 8.641532465671846, None, 12.245999839836097, 7.995306620290822, None, None, 12.466292304281597, None, 12.682567660071214, 12.55021893258803, None, 13.788921172637307, 9.967072494161238, None, 12.314644499317684, 10.90954585959363, None] + self.assertEqual(doc._.log_root_freqs_HAL,log_root_freqs_HAL) + + + def test_mean_logfreqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + mean_freqs_HAL = 11.702238824461727 + self.assertEqual(doc._.mean_logfreq_HAL, mean_freqs_HAL) + + def test_med_logfreqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_logfreq_HAL,12.109435085145911) + + def test_max_logfreqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_logfreq_HAL,14.66038047327681) + + def test_min_logfreqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_logfreq_HAL,5.214935757608986) + + def test_std_logfreqs_HAL(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_logfreq_HAL,1.5929041182131862) + + def test_root_pfmfs(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + root_pfmfs = [None, '3.44', '0', '0', '50', None, '0', '0', '0', None, '50', '0', '0', None, None, '0', '0', None, '13.04', None, None, None, None, '0', None, '0', None, None, '0', '0', None, '0', None, '0', None, '25', None, None, '9.09', '7.69', None, None, '2.85', '0', '0', None, '0', None, '58.82', None, '0', None, None, '0', '0', None, '0', None, None, '9.09', None, '0', None, None, '0', '0', '100', None, None, None, '38.82', None, '0', None, '0', None, None, '12.5', None, '24.13', None, None, '0', None, '0', '0', None, '0', '0', None, None, None, '0', '0', None, '0', '0', '0', None, '0', '0', None, None, '0', None, '8.33', '0', None, '20', '0', '0', None, '0', None, None, None, None, '0', '100', '0', None, None, '42.1', None, None, '0', None, None, None, '0', None, None, None, '3.44', '0', None, '0', None, '0', '0', None, None, '10.6', None, None, '10', None, '0', None, '0', None, None, None, None, '0', None, None, None, '0', '71.42', None, '0', '25', None, '0', None, '0', None, '0', '63.63', None, '0', '0', '0', None, '0', '0', None, None, '7.57', None, '0', '0', '16.66', None, '6.66', None, '0', None, '0', None, None, '0', '0', None, None, '16.66', '2.35', '0', '0', None, '0', '16.66', '0', None, '10.25', None, '0', '0', None, '0', None, '0', '0', '0', None, '0', None, '0', None, '0', '0', None, None, '20', None, None, '71.42', None, '0', None, None, '14.28', '44.44', None, '36.36', None, '0', None, '0', '0', None, None, '0', '0', '0', None, '0', '0', None, '0', None, None, '0', None, '0', None, '0', None, '0', '50', '0', None, '0', None, '0', None, '33.33', None, None, '0', None, None, '44.44', None, '0', None, '0', None, '0', None, None, None, None, '0', None, '0', '0', '5.88', None, None, None, '0', '0', '0', None, None, '0', None, '0', '0', '0', None, None, None, '0', '6.66', '0', None, None, '0', None, None, '0', None, '9.09', None, '4.54', None, '0', None, '0', '16.66', None, None, '0', '0', None, None, '0', '0', '0', None, '0', '0', None, '0', '0', '0', None, '0', '42.1', None, None, '0', None, None, None, '0', '15.78', None, '6.84', '33.33', None, '0', None, '0', '9.09', None, '0', None, None, None, '6.06', None, '33.33', None, '0', '0', '0', None, '18.05', '100', '0', None, None, '0', None, '0', None, None, '0', '0', '0', None, '0', '14.28', None, '0', '16.66', None, '10', None, None, None, '0', None, None, '3.44', None, None, None, '0', '0', None, '0', '0', None, '0', '14.28', None, '0', None, None, '0', '11.76', None, None, '9.09', None, None, None, None, '0', None, None, '50', '6.25', None, '0', None, None, None, None, '0', None, '0', None, '0', '0', None, '14.28', '0', None, '22.72', '0', '0', None, None, None, '11.11', '0', '0', None, None, '0', '0', '10.41', None, '0', None, None, '100', '0', None, None, None, '0', '25', None, None, '0', '20', '17.24', None, '6.25', None, '0', None, None, None, '0', '0', None, None, None, None, '0', None, None, '25', None, None, None, None, None, None, None, '0', None, '0', None, '0', '0', None, '0', None, None, '4.1', '0', None, '0', None, '0', None, '0', '14.28', None, None, '0', None, None, None, None, None, None, None, None, None, '0', '0', '0', None, None, '0', None, None, '0', '0', '0', '0', None, None, '0', None, '5.88', None, None, '0', None, '71.42', None, '0', None, '60', '0', None, None, None, '0', None, '0', None, None, '14.28', None, '0', '0', None, '0', '66.66', None, '50', None, None, '0', None, '0', None, '0', '0', '0', '83.33', '0', None, None, '0', None, None, '0', None, '0', None, '0', '0', '0', '0', None, '0', None, '0', '0', None, '50', None, '0', '0', None, '0', '0', None, '0', None, None, None, '50', None, '50', '0', None, '25', None, None, None, None, '0', '0', None, '0', '0', None, None, None, '25', None, '0', None, None, '16.66', None, None, None, '0', None, '0', None, None, '0', '0', None, None, None, None, None, '0', None, None, '0', None, '0', None, None, '0', None, '0', None, None, None, None, '0', None, None, None, None, '0', None, '0', None, None, None, '0', None, '0', '0', None, None, '0', '0', None, '50', '0', None, None, '40', None, '0', '0', None, '0', '0', None, '0', '14.28', None] + self.assertEqual(doc._.root_pfmfs,root_pfmfs) + + def test_mean_root_pfmf(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.mean_root_pfmf,7.700984615384615) + + def test_med_root_pfmf(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_root_pfmf,0.0) + + def test_max_root_pfmf(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_root_pfmf,100.0) + + def test_min_root_pfmf(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_root_pfmf,0.0) + + def test_std_root_pfmf(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_root_pfmf,18.250995193981897) + + def test_token_freqs(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + token_freqs = [7.73, 4.97, 3.96, 5.09, 3.69, 6.85, 5.63, 5.24, 4.84, 6.91, 5.04, 5.35, 5.27, 6.08, 7.73, 5.36, 5.02, None, 4.55, 6.31, 6.37, 6.27, 6.35, 6.29, 7.27, 5.39, 6.0, 7.73, 4.88, 5.14, 7.4, 6.25, 6.71, 5.07, 6.36, 3.95, None, 7.36, 4.98, 4.17, 6.91, 7.36, 4.59, 5.95, 4.34, 6.24, 5.09, 6.37, 2.86, 7.73, 5.89, None, 6.0, 6.25, 3.56, 7.43, 5.96, 7.27, 6.53, 3.94, 7.01, 4.84, 6.91, 6.53, 4.66, 3.57, 4.84, None, 5.72, 7.73, 2.94, None, 4.51, 6.74, 5.75, 7.01, 7.73, 4.5, 6.45, 4.05, 6.85, 7.36, 4.97, 7.01, 4.46, 5.43, 6.91, 4.0, 4.1, 7.01, 6.74, None, 2.73, 4.96, None, 5.12, 4.98, 5.81, None, 5.66, 5.35, 6.45, 6.79, 4.85, 5.87, 4.2, 4.45, 7.01, 3.75, 3.77, 3.28, 7.73, 5.06, None, 5.69, 7.4, 6.04, 5.2, 3.26, 6.06, 6.71, 6.27, 3.07, 6.7, 7.73, 5.32, 7.4, 7.73, 3.89, 5.02, None, None, 7.73, 4.97, 4.5, 7.43, 4.77, 6.04, 4.41, 5.12, 7.43, 7.36, 4.39, 7.27, 7.73, 4.96, 7.01, 4.61, 7.43, 6.08, 7.01, 5.16, None, 7.73, 4.07, 7.07, 7.01, 6.53, 4.91, 3.81, 6.91, 5.09, 2.56, 7.73, 5.97, 7.01, 6.25, 7.43, 6.08, 3.1, 7.43, 4.42, 5.0, 1.88, None, 5.57, 6.04, 6.7, 7.73, 4.52, None, 6.47, 6.06, 4.46, 7.01, 5.69, 7.36, 5.45, None, 4.97, None, 6.54, 4.85, 5.3, None, 7.73, 3.61, 4.43, 6.27, 5.97, 7.43, 5.76, 4.11, 4.68, 7.4, 4.68, None, 5.43, 4.46, 7.41, 4.68, None, 5.09, 5.33, 3.77, 6.82, 5.24, 6.66, 5.54, 7.73, 5.19, 3.65, None, None, 5.58, None, 6.82, 3.81, 6.91, 5.09, 5.74, 6.69, 4.48, 3.03, 7.73, 4.01, 7.01, 4.49, 7.73, 5.35, 4.85, None, 7.73, 4.85, 4.63, 4.43, 7.01, 5.09, 4.67, 7.01, 4.02, None, 7.73, 5.45, None, 4.97, 7.41, 5.3, 6.52, 5.09, 4.98, 6.29, 7.01, 6.25, 7.43, 5.54, 6.36, 3.79, None, 6.82, 4.38, 5.74, 6.69, 3.03, 7.73, 5.97, 7.01, 4.61, 7.43, 6.08, 7.01, 5.16, None, 7.27, 5.41, None, 5.09, 3.18, 4.32, 7.43, 6.69, 6.12, 4.15, 6.25, 5.19, None, 6.63, 5.98, 5.1, 5.02, 6.25, 5.19, 7.01, 5.96, 6.69, 4.73, 5.69, 5.09, None, 7.01, 5.27, None, 7.73, 3.61, 7.4, 3.49, 6.37, 4.72, 7.36, 5.97, 7.01, 4.72, 4.33, 6.91, 7.36, 4.95, 4.83, None, 6.85, 4.68, 5.23, 4.41, 6.63, 4.19, 4.61, None, 4.95, 4.05, 4.83, 7.36, 4.66, 3.07, 7.43, 7.73, 4.76, None, 7.41, None, 3.32, 4.82, None, 4.04, 3.66, 6.91, 5.1, 6.37, 5.05, 4.88, None, 5.52, 7.01, 6.74, 6.69, 4.28, 6.91, 3.81, None, 4.62, 4.29, 4.89, 7.43, 2.88, 4.41, 5.12, 5.84, 6.77, 5.57, None, 4.5, 7.43, 6.04, 4.86, 5.19, 4.78, 7.73, 2.89, 3.04, 7.4, 3.33, 4.5, 7.41, 4.54, None, None, 7.27, 4.46, 7.43, 7.73, 4.97, None, 6.54, 6.46, 5.99, 6.1, 6.24, 5.09, 3.18, 7.73, 5.35, 4.22, None, 4.99, 6.24, 7.73, 4.83, 4.56, 7.41, 7.73, 3.67, 7.4, 7.73, 5.06, 6.37, 5.03, 7.01, 6.53, 3.82, 4.85, 7.4, 4.84, None, 2.46, None, 7.36, 5.07, 5.06, 4.0, 7.01, 5.16, 5.43, None, 3.94, 4.91, 7.43, 5.25, 5.39, 4.48, 7.01, 7.36, 6.36, 4.45, 4.9, 5.04, None, 6.82, 4.68, 4.43, 3.64, 7.4, 5.24, 7.01, 6.34, 4.83, 4.9, 5.93, 7.43, 7.73, 5.16, 2.93, None, 6.85, 4.91, 3.39, 4.16, None, 4.41, 6.46, 4.38, 6.63, 7.73, 6.0, 4.28, 4.06, None, 3.9, 3.43, None, 6.47, 7.4, 7.73, 3.91, 7.4, 7.73, 4.62, 2.6, 5.31, 4.95, None, 5.51, 6.51, 4.84, 6.91, 4.72, 5.56, 2.89, 4.51, None, 7.73, 4.31, 4.91, 7.4, 4.42, 7.41, 4.73, 6.37, 3.66, 3.95, None, 2.56, 4.02, 6.63, 5.4, 5.22, 7.43, 4.78, 5.04, None, None, 6.82, 5.86, 5.27, 4.8, 7.73, 6.0, 5.44, 7.27, 6.24, 5.09, 5.49, 4.8, 5.44, 7.43, 7.73, 5.33, 7.4, 4.32, None, 6.66, 4.68, 6.14, 3.81, 6.91, 5.09, None, 4.78, 4.95, 6.46, 6.18, 6.79, 4.49, None, 4.99, 6.24, 7.73, 5.34, 3.65, 5.02, 4.04, 7.73, 5.1, 3.93, 7.4, 3.26, None, 6.82, 4.9, 6.33, 3.17, 7.73, 5.35, 5.08, 5.75, 1.39, 5.61, None, 7.41, 5.54, 6.85, 7.73, 5.09, 7.4, 3.76, None, 5.63, 5.24, 4.61, 3.52, 7.43, 4.92, 7.36, 5.89, 5.63, 7.4, 3.26, None, 5.47, 5.09, None, 4.94, 5.01, 6.34, 4.53, 6.38, None, 7.41, 3.26, 6.82, 3.52, 4.22, 7.41, 3.03, None, None, 5.09, 6.45, 5.76, 5.05, 7.73, 5.35, 5.27, None, 6.63, 7.73, 4.45, 7.4, 5.3, 7.43, 7.73, 4.31, 7.4, 3.02, None, 5.41, 7.73, 5.39, 7.4, 7.73, 5.35, 5.08, None, 6.31, 6.45, 6.79, 6.35, 4.7, 7.43, 7.73, 5.62, 7.4, 5.19, None, 5.79, 6.25, 7.41, 5.75, None, 7.01, 6.04, 7.43, 4.15, None, 6.31, 7.07, 6.35, 5.97, 7.43, 4.13, 7.43, 7.36, 2.24, 4.58, 7.43, 6.25, 5.74, None, 6.63, 5.32, 4.16, 7.36, 3.79, 3.65, 7.43, 7.73, 4.11, 7.01, 5.09, 4.8, 7.01, 6.25, 3.5, 7.4, 5.35, 4.22, None] + self.assertEqual(doc._.token_freqs,token_freqs) + + def test_mean_token_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.mean_token_frequency,4.649855072463768) + + def test_med_token_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.median_token_frequency,4.83) + + def test_max_token_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_token_frequency,6.29) + + def test_min_token_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_token_frequency,1.39) + + def test_std_token_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_token_frequency,0.84861873536042) + + def test_lemma_freqs(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + lemma_freqs = [7.73, 4.97, 4.99, 5.09, 3.69, 6.85, 5.63, 5.24, 5.61, 6.91, 5.04, 5.35, 5.27, 6.08, 7.73, 5.36, 5.02, None, 4.55, 6.31, 6.71, 6.79, 6.35, 6.29, 7.27, 5.39, 6.0, 7.73, 5.54, 5.89, 7.4, 6.25, 6.71, 5.54, 6.36, 3.95, None, 7.36, 4.98, 4.17, 6.91, 7.36, 4.59, 5.95, 4.43, 6.24, 5.09, 6.71, 2.85, 7.73, 5.89, None, 6.0, 6.25, 3.56, 7.43, 5.96, 7.27, 6.53, 3.94, 7.01, 5.49, 6.91, 6.53, 4.66, 3.57, 4.84, None, 5.72, 7.73, 2.94, None, 5.26, 6.79, 5.75, 7.01, 7.73, 4.5, 6.45, 4.05, 6.85, 7.36, 4.97, 7.01, 5.26, 5.43, 6.91, 4.0, 4.77, 7.01, 6.79, None, 2.85, 4.96, None, 5.12, 5.34, 5.81, None, 5.66, 5.18, 6.45, 6.79, 4.94, 5.87, 4.2, 5.02, 7.01, 3.75, 4.33, 4.03, 7.73, 5.06, None, 5.69, 7.4, 6.04, 5.2, 3.77, 6.06, 6.71, 6.79, 3.07, 6.7, 7.73, 5.32, 7.4, 7.73, 3.89, 5.02, None, None, 7.73, 4.97, 4.85, 7.43, 4.77, 6.04, 4.41, 5.54, 7.43, 7.36, 4.39, 7.27, 7.73, 4.96, 7.01, 5.35, 7.43, 6.08, 7.01, 5.16, None, 7.73, 4.07, 6.79, 7.01, 6.53, 5.08, 3.81, 6.91, 5.09, 2.98, 7.73, 5.97, 7.01, 6.25, 7.43, 6.08, 3.1, 7.43, 4.42, 5.0, 2.6, None, 5.81, 6.04, 6.7, 7.73, 4.52, None, 6.47, 6.06, 4.46, 7.01, 5.69, 7.36, 5.45, None, 4.97, None, 6.54, 4.85, 5.3, None, 7.73, 3.61, 4.43, 6.27, 5.97, 7.43, 5.76, 4.11, 4.8, 7.4, 4.68, None, 5.43, 4.46, 7.41, 4.68, None, 5.09, 5.33, 4.47, 6.82, 5.24, 6.66, 6.08, 7.73, 5.4, 3.65, None, None, 5.58, None, 6.82, 3.81, 6.91, 5.09, 6.35, 6.69, 4.48, 3.03, 7.73, 4.01, 7.01, 5.05, 7.73, 5.35, 2.44, None, 7.73, 4.85, 5.27, 4.43, 7.01, 5.09, 5.01, 7.01, 4.02, None, 7.73, 5.45, None, 4.97, 7.41, 5.3, 6.52, 5.09, 4.98, 6.29, 7.01, 6.25, 7.43, 5.54, 6.36, 3.79, None, 6.82, 4.38, 6.35, 6.69, 3.03, 7.73, 5.97, 7.01, 5.35, 7.43, 6.08, 7.01, 5.16, None, 7.27, 5.41, None, 5.09, 5.63, 4.32, 7.43, 6.69, 6.12, 4.15, 6.25, 5.4, None, 6.63, 5.98, 5.1, 5.02, 6.25, 5.23, 7.01, 6.35, 6.69, 4.73, 5.69, 5.09, None, 7.01, 5.27, None, 7.73, 3.61, 7.4, 3.94, 6.71, 4.37, 7.36, 5.97, 7.01, 4.72, 4.33, 6.91, 7.36, 4.95, 4.83, None, 6.85, 5.08, 5.23, 4.84, 6.63, 3.89, 5.29, None, 4.95, 4.05, 5.4, 7.36, 4.66, 3.07, 7.43, 7.73, 4.76, None, 7.41, None, 3.32, 4.82, None, 4.04, 3.66, 6.91, 5.1, 6.71, 5.02, 4.88, None, 5.78, 7.01, 6.79, 6.69, 4.28, 6.91, 3.81, None, 5.01, 4.94, 5.49, 7.43, 2.88, 4.41, 5.57, 5.84, 6.77, 5.35, None, 4.84, 7.43, 6.04, 4.86, 5.4, 4.78, 7.73, 2.89, 4.22, 7.4, 3.33, 4.21, 7.41, 4.16, None, None, 7.27, 4.46, 7.43, 7.73, 4.97, None, 6.54, 6.46, 5.99, 6.1, 6.24, 5.09, 5.63, 7.73, 5.35, 4.22, None, 4.99, 6.24, 7.73, 4.83, 4.56, 7.41, 7.73, 3.67, 7.4, 7.73, 5.06, 6.71, 5.01, 7.01, 6.53, 3.82, 4.85, 7.4, 5.36, None, 2.46, None, 7.36, 5.07, 5.06, 4.0, 7.01, 5.16, 5.43, None, 4.22, 4.77, 7.43, 5.25, 5.39, 3.64, 7.01, 7.36, 6.36, 4.45, 4.9, 5.04, None, 6.82, 4.68, 5.48, 3.77, 7.4, 5.24, 7.01, 6.79, 4.83, 5.36, 5.93, 7.43, 7.73, 5.16, 2.93, None, 6.85, 5.08, 3.39, 4.29, None, 4.41, 6.46, 4.38, 6.63, 7.73, 6.0, 4.28, 4.75, None, 3.9, 3.43, None, 6.47, 7.4, 7.73, 4.08, 7.4, 7.73, 4.62, 2.6, 5.31, 4.95, None, 5.05, 6.51, 5.36, 6.91, 4.72, 5.56, 2.89, 4.47, None, 7.73, 4.31, 4.91, 7.4, 5.1, 7.41, 4.73, 6.71, 4.12, 3.95, None, 2.56, 4.87, 6.63, 5.4, 5.22, 7.43, 4.78, 5.04, None, None, 6.82, 5.86, 5.27, 5.15, 7.73, 6.0, 5.44, 7.27, 6.24, 5.09, 5.49, 5.15, 5.44, 7.43, 7.73, 5.33, 7.4, 4.32, None, 6.66, 5.08, 6.14, 3.81, 6.91, 5.09, None, 4.78, 5.06, 6.46, 6.18, 6.79, 4.57, None, 4.99, 6.24, 7.73, 5.34, 3.65, 5.02, 4.52, 7.73, 5.1, 3.93, 7.4, 3.26, None, 6.82, 4.9, 6.71, 2.58, 7.73, 5.35, 5.08, 5.75, 1.39, 5.95, None, 7.41, 5.54, 6.85, 7.73, 5.09, 7.4, 4.0, None, 5.63, 5.24, 5.35, 4.31, 7.43, 4.92, 7.36, 5.89, 5.63, 7.4, 3.26, None, 5.81, 5.09, None, 4.94, 5.29, 6.79, 4.81, 6.38, None, 7.41, 3.26, 6.79, 3.52, 4.82, 7.41, 3.29, None, None, 5.09, 6.45, 5.76, 5.05, 7.73, 5.35, 5.27, None, 6.63, 7.73, 4.45, 7.4, 5.3, 7.43, 7.73, 4.31, 7.4, 3.02, None, 5.71, 7.73, 5.39, 7.4, 7.73, 5.35, 5.08, None, 6.31, 6.45, 6.79, 6.35, 4.7, 7.43, 7.73, 5.62, 7.4, 5.4, None, 5.79, 6.25, 7.41, 5.75, None, 7.01, 6.54, 7.43, 4.15, None, 6.31, 6.79, 6.35, 5.97, 7.43, 4.13, 7.43, 7.36, 2.24, 4.58, 7.43, 6.25, 5.72, None, 6.63, 5.32, 4.16, 7.36, 3.79, 3.65, 7.43, 7.73, 4.58, 7.01, 5.09, 5.15, 7.01, 6.25, 4.43, 7.4, 5.35, 4.22, None] + self.assertEqual(doc._.lemma_freqs,lemma_freqs) + + def test_mean_lemma_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.mean_lemma_frequency,4.75159420289855) + + def test_med_lemma_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.median_lemma_frequency,4.97) + + def test_max_lemma_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_lemma_frequency,6.29) + + def test_min_lemma_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_lemma_frequency,1.39) + + def test_std_lemma_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_lemma_frequency,0.8482944982612167) + + def test_max_freqs(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + max_freqs = [7.73, 4.97, 4.99, 5.09, 3.69, 6.85, 5.63, 5.24, 5.61, 6.91, 5.04, 5.35, 5.27, 6.08, 7.73, 5.36, 5.02, 0.0, 4.55, 6.31, 6.71, 6.79, 6.35, 6.29, 7.27, 5.39, 6.0, 7.73, 5.54, 5.89, 7.4, 6.25, 6.71, 5.54, 6.36, 3.95, 0.0, 7.36, 4.98, 4.17, 6.91, 7.36, 4.59, 5.95, 4.43, 6.24, 5.09, 6.71, 2.85, 7.73, 5.89, 0.0, 6.0, 6.25, 3.56, 7.43, 5.96, 7.27, 6.53, 3.94, 7.01, 5.49, 6.91, 6.53, 4.66, 3.57, 4.84, 0.0, 5.72, 7.73, 2.94, 0.0, 5.26, 6.79, 5.75, 7.01, 7.73, 4.5, 6.45, 4.05, 6.85, 7.36, 4.97, 7.01, 5.26, 5.43, 6.91, 4.0, 4.77, 7.01, 6.79, 3.79, 2.85, 4.96, 0.0, 5.12, 5.34, 5.81, 0.0, 5.66, 5.18, 6.45, 6.79, 4.94, 5.87, 4.2, 5.02, 7.01, 3.75, 4.33, 4.03, 7.73, 5.06, 0.0, 5.69, 7.4, 6.04, 5.2, 3.77, 6.06, 6.71, 6.79, 3.07, 6.7, 7.73, 5.32, 7.4, 7.73, 3.89, 5.02, 0.0, 0.0, 7.73, 4.97, 4.85, 7.43, 4.77, 6.04, 4.41, 5.54, 7.43, 7.36, 4.39, 7.27, 7.73, 4.96, 7.01, 5.35, 7.43, 6.08, 7.01, 5.16, 0.0, 7.73, 4.07, 6.79, 7.01, 6.53, 5.08, 3.81, 6.91, 5.09, 2.98, 7.73, 5.97, 7.01, 6.25, 7.43, 6.08, 3.1, 7.43, 4.42, 5.0, 2.6, 0.0, 5.81, 6.04, 6.7, 7.73, 4.52, 0.0, 6.47, 6.06, 4.46, 7.01, 5.69, 7.36, 5.45, 0.0, 4.97, 0.0, 6.54, 4.85, 5.3, 0.0, 7.73, 3.61, 4.43, 6.27, 5.97, 7.43, 5.76, 4.11, 4.8, 7.4, 4.68, 0.0, 5.43, 4.46, 7.41, 4.68, 0.0, 5.09, 5.33, 4.47, 6.82, 5.24, 6.66, 6.08, 7.73, 5.4, 3.65, 0.0, 0.0, 5.58, 0.0, 6.82, 3.81, 6.91, 5.09, 6.35, 6.69, 4.48, 3.03, 7.73, 4.01, 7.01, 5.05, 7.73, 5.35, 2.44, 0.0, 7.73, 4.85, 5.27, 4.43, 7.01, 5.09, 5.01, 7.01, 4.02, 0.0, 7.73, 5.45, 0.0, 4.97, 7.41, 5.3, 6.52, 5.09, 4.98, 6.29, 7.01, 6.25, 7.43, 5.54, 6.36, 3.79, 0.0, 6.82, 4.38, 6.35, 6.69, 3.03, 7.73, 5.97, 7.01, 5.35, 7.43, 6.08, 7.01, 5.16, 0.0, 7.27, 5.41, 0.0, 5.09, 5.63, 4.32, 7.43, 6.69, 6.12, 4.15, 6.25, 5.4, 0.0, 6.63, 5.98, 5.1, 5.02, 6.25, 5.23, 7.01, 6.35, 6.69, 4.73, 5.69, 5.09, 0.0, 7.01, 5.27, 0.0, 7.73, 3.61, 7.4, 3.94, 6.71, 4.37, 7.36, 5.97, 7.01, 4.72, 4.33, 6.91, 7.36, 4.95, 4.83, 0.0, 6.85, 5.08, 5.23, 4.84, 6.63, 3.89, 5.29, 0.0, 4.95, 4.05, 5.4, 7.36, 4.66, 3.07, 7.43, 7.73, 4.76, 0.0, 7.41, 0.0, 3.32, 4.82, 0.0, 4.04, 3.66, 6.91, 5.1, 6.71, 5.02, 4.88, 0.0, 5.78, 7.01, 6.79, 6.69, 4.28, 6.91, 3.81, 0.0, 5.01, 4.94, 5.49, 7.43, 2.88, 4.41, 5.57, 5.84, 6.77, 5.35, 0.0, 4.84, 7.43, 6.04, 4.86, 5.4, 4.78, 7.73, 2.89, 4.22, 7.4, 3.33, 4.21, 7.41, 4.16, 0.0, 0.0, 7.27, 4.46, 7.43, 7.73, 4.97, 0.0, 6.54, 6.46, 5.99, 6.1, 6.24, 5.09, 5.63, 7.73, 5.35, 4.22, 0.0, 4.99, 6.24, 7.73, 4.83, 4.56, 7.41, 7.73, 3.67, 7.4, 7.73, 5.06, 6.71, 5.01, 7.01, 6.53, 3.82, 4.85, 7.4, 5.36, 0.0, 2.46, 0.0, 7.36, 5.07, 5.06, 4.0, 7.01, 5.16, 5.43, 0.0, 4.22, 4.77, 7.43, 5.25, 5.39, 3.64, 7.01, 7.36, 6.36, 4.45, 4.9, 5.04, 0.0, 6.82, 4.68, 5.48, 3.77, 7.4, 5.24, 7.01, 6.79, 4.83, 5.36, 5.93, 7.43, 7.73, 5.16, 2.93, 0.0, 6.85, 5.08, 3.39, 4.29, 0.0, 4.41, 6.46, 4.38, 6.63, 7.73, 6.0, 4.28, 4.75, 0.0, 3.9, 3.43, 0.0, 6.47, 7.4, 7.73, 4.08, 7.4, 7.73, 4.62, 2.6, 5.31, 4.95, 0.0, 5.05, 6.51, 5.36, 6.91, 4.72, 5.56, 2.89, 4.47, 0.0, 7.73, 4.31, 4.91, 7.4, 5.1, 7.41, 4.73, 6.71, 4.12, 3.95, 0.0, 2.56, 4.87, 6.63, 5.4, 5.22, 7.43, 4.78, 5.04, 0.0, 0.0, 6.82, 5.86, 5.27, 5.15, 7.73, 6.0, 5.44, 7.27, 6.24, 5.09, 5.49, 5.15, 5.44, 7.43, 7.73, 5.33, 7.4, 4.32, 0.0, 6.66, 5.08, 6.14, 3.81, 6.91, 5.09, 0.0, 4.78, 5.06, 6.46, 6.18, 6.79, 4.57, 0.0, 4.99, 6.24, 7.73, 5.34, 3.65, 5.02, 4.52, 7.73, 5.1, 3.93, 7.4, 3.26, 0.0, 6.82, 4.9, 6.71, 2.58, 7.73, 5.35, 5.08, 5.75, 1.39, 5.95, 0.0, 7.41, 5.54, 6.85, 7.73, 5.09, 7.4, 4.0, 0.0, 5.63, 5.24, 5.35, 4.31, 7.43, 4.92, 7.36, 5.89, 5.63, 7.4, 3.26, 0.0, 5.81, 5.09, 0.0, 4.94, 5.29, 6.79, 4.81, 6.38, 0.0, 7.41, 3.26, 6.79, 3.52, 4.82, 7.41, 3.29, 0.0, 0.0, 5.09, 6.45, 5.76, 5.05, 7.73, 5.35, 5.27, 0.0, 6.63, 7.73, 4.45, 7.4, 5.3, 7.43, 7.73, 4.31, 7.4, 3.02, 0.0, 5.71, 7.73, 5.39, 7.4, 7.73, 5.35, 5.08, 0.0, 6.31, 6.45, 6.79, 6.35, 4.7, 7.43, 7.73, 5.62, 7.4, 5.4, 0.0, 5.79, 6.25, 7.41, 5.75, 0.0, 7.01, 6.54, 7.43, 4.15, 0.0, 6.31, 6.79, 6.35, 5.97, 7.43, 4.13, 7.43, 7.36, 2.24, 4.58, 7.43, 6.25, 5.72, 0.0, 6.63, 5.32, 4.16, 7.36, 3.79, 3.65, 7.43, 7.73, 4.58, 7.01, 5.09, 5.15, 7.01, 6.25, 4.43, 7.4, 5.35, 4.22, 0.0] + self.assertEqual(doc._.max_freqs,max_freqs) + + def test_mean_max_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.mean_max_frequency,4.75159420289855) + + def test_med_max_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.median_max_frequency,4.97) + + def test_max_max_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_max_frequency,6.29) + + def test_min_max_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_max_frequency,1.39) + + def test_std_max_frequency(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_max_frequency,0.8482944982612167) + + def test_concretes(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + concretes = [None, 3.6101290323, 4.5504999999999995, 2.727, 2.1228816162, None, 1, 3.7213333333, 4.42225, None, 3.4594, 2.67236875, 4.6227030272, None, None, 3.4244923077, 4.4815135135, None, 2.13169, None, None, None, None, 2.485, None, 3.758, None, None, 4.2983770334999996, 2.719, None, 4.0375862069, None, 3.7058418914, 1.0, 2.9844662465000003, None, None, 3.7257487628, 4.273, None, None, 3.3423284553, 4.6765, 3.912, None, 2.727, None, 3.715627451, None, 4.956, None, 3.324, 4.0375862069, 4.3136091861, None, 4.1723939394, None, None, 6.2751666667, None, 3.4237857726, None, None, 3.3450666667, 5.166, 6.371, None, None, None, 4.75, None, 2.653, None, 2.5534, None, None, 6.0109, None, 3.49005, None, None, 6.331, None, 3.715627451, 4.7225, None, 6.485, 5.8604285714, None, None, None, 5.219, 1, None, None, 2.824, 4.571, None, 5.0, 5.7865, None, None, 2.6895, None, 1, 3.781, None, 3.543, 6.029, 3.90935, None, 6.421, None, None, None, None, 2.9277179487, 4.196697479, None, None, None, 2.6968, None, None, 4.7328896984, None, None, 3.2888, 4.4815135135, None, None, None, 3.6101290323, 3.6565000000000003, None, 3.758, None, 3.012, 3.4627777778, None, None, 4.4855972222, None, None, 3.30775, None, 3.394, None, 2.74, None, None, None, None, 3.6101290323, None, None, None, 3.923, 3.059, None, 2.727, 3.936, None, 2.8826666667, None, 4.0375862069, None, 2.74, 2.1318350311, None, 3.118, 4.0028023392, 2.85, None, 3.314, 1, None, None, 4.059, None, None, None, 4.086, None, None, None, 6.2751666667, None, 6.331, None, None, 4.0784, 6.5945, None, None, 2.90060625, 5.5668983051000005, None, 1, None, 4.938, 3.1170739726, 3.0, None, 6.09975, None, 4.7225, 4.514, None, 4.9961666667, None, 2.727, 2.9116, 5.333, None, 3.7213333333, None, 3.90935, None, 3.6731428571, 3.4237009585, None, None, 3.69, None, None, 3.059, None, 2.727, None, None, 1.7887015385, 2.8723333333000003, None, 3.9589, None, 3.0855, None, 2.67236875, 6.343, None, None, 3.7830876481, 4.2752291667, 3.912, None, 2.727, 2.971, None, 3.114, None, None, 6.2751666667, None, 6.331, None, 6.5945, None, 3.7665769231, 2.68816, 2.485, None, 4.0375862069, None, 4.2983770334999996, 1.0, 2.7195516168, None, None, 4.086, None, None, 2.8723333333000003, None, 2.8826666667, None, 3.394, None, 2.74, None, None, None, None, 3.3318333333, None, 2.727, 3.118, 4.059, None, None, 1, 4.5504999999999995, 3.742, 3.6731428571, None, None, None, None, 3.8214958678, 3.742, 3.147, None, None, None, 3.5315, None, 2.727, None, None, 4.2752291667, None, None, 2.471, None, 6.2751666667, None, 3.2952, None, 2.8826666667, None, 5.2524, 2.943, None, None, 3.818, 4.798, None, None, 3.657, 5.2652, 2.9153333333, None, 3.378, 5.75, None, 3.818, 4.412, 3.378, None, 3.344, 2.6968, None, None, 6.072, None, None, None, 5.241, 3.743, None, 2.1816, 3.6151677852, None, 5.0698, None, 3.8214958678, 4.909, None, 5.0556666667, None, None, None, 2.8893595993, None, 4.235, None, 2.971, 3.613, 5.324, None, 3.9699999999999998, 3.03, 4.7725879397, 3.0203733333, None, 5.5494545455, None, 3.121, None, None, 2.7252553846, 3.6731428571, 3.1517857143, None, 1, 3.7556666667, None, 2.669742723, 5.4043333333, None, 5.5165, None, None, None, 3.313, None, None, 3.6101290323, None, None, None, 1, 3.7104, None, 2.727, 3.118, None, 2.67236875, 3.7556666667, None, 3.096037037, None, None, 5.063, 3.333, None, None, 4.1698333333, None, None, 5.418, None, 2.971, None, None, 1, 4.8692888889, None, 3.5119111111, None, None, None, None, 2.839, 5.418, 4.0, None, 3.588, 4.7225, None, 2.971, 4.986, None, 4.912, 4.1943333333, 4.5116521739, None, None, 1.0, 1, 5.8991428571, 4.3125, None, None, 4.9231666667, 3.057, 5.2637428928, None, 3.7213333333, None, None, 3.1614336165, 3.8631272727, None, None, None, 3.588, 4.9658318739, None, None, 3.923, 3.665856, 4.462, None, 3.947, None, 4.177, None, None, 2.5534, 2.6578920635000003, 5.985, None, None, None, None, None, None, None, 6.0, None, None, 5.2090425532, 4.4815135135, None, 3.02425, None, 4.089, None, 3.5119111111, None, 4.061, 3.442, 4.3243979592, 3.0, None, None, 3.0724059406, 4.857, None, 3.8985000000000003, None, 5.888, None, 3.677, 3.2069333333, None, None, 4.7305, None, 5.3066707678, 5.311314978, None, None, 4.7443187428, None, None, None, 3.4244923077, 4.2752291667, 4.0125, None, 3.324, 2.455, None, None, 2.727, 2.677, 4.0125, 2.455, None, None, 3.091, None, 4.059, None, None, 3.657, None, 3.059, None, 2.727, None, 2.23728, 3.02425, None, 1, None, 2.6895, None, 3.096037037, None, None, 3.4244923077, 3.2888, 4.4815135135, 3.9726654991, None, 2.8921342281999998, 5.143, None, 5.529, None, None, 5.2455789474, None, 4.3083333333, None, 2.67236875, 4.412, None, 3.4553036269, 4.4815135135, None, None, 1, None, None, 2.727, None, 5.688, None, 1, 3.7213333333, 3.394, 2.448, None, 1.9140000000000001, None, 4.956, 1, None, 5.529, None, 3.433, 2.727, None, 4.8551666667, 4.533, None, 3.715627451, None, None, None, 5.529, None, 2.2810043213, 3.9855, None, 4.3683333333, None, None, 2.727, None, 1, 3.0855, None, 2.67236875, 4.6227030272, None, None, None, 2.9779999999999998, None, 6.123, None, None, 4.0626, None, 4.58232, None, 3.5734444443999998, None, 3.758, None, None, 2.67236875, 4.412, None, None, None, None, None, 2.824, None, None, 4.7603333333, None, 3.6731428571, None, None, 3.742, None, 3.2255333333, None, None, None, None, 4.5504999999999995, None, None, 3.1517857143, None, 2.8826666667, None, 3.5, None, None, 5.8155, 2.6575714286, None, 3.742, 4.6522857143, None, None, 2.5, 3.182, None, 1.8708, 4.309, None, None, 3.2032333333, None, 2.727, 4.0125, None, 3.742, 5.375, None, 2.67236875, 3.7556666667, None] + + self.assertEqual(doc._.concretes,concretes) + + def test_mean_concreteness(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.mean_concreteness,3.828875890239528) + + def test_med_concreteness(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.med_concreteness,3.7213333333) + + def test_max_concreteness(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.max_concreteness,6.5945) + + def test_min_concreteness(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.min_concreteness,1.0) + + def test_std_concreteness(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.std_concreteness,1.1503349614987735) + """ + + def test_abstract_traits(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + abstract_traits = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + self.assertEqual(doc._.abstract_traits,abstract_traits) + + def test_propn_abstract_traits(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.propn_abstract_traits,0.08948863636363637) + + def test_animates(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + animates = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] + + self.assertEqual(doc._.animates,animates) + + def test_propn_animates(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.propn_animates,0.06392045454545454) + + def test_locations(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + locations = [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, None, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, None, False, False, None, False, False, True, None, False, False, False, False, False, False, False, False, False, False, False, False, False, True, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, None, False, False, False, False, False, False, True, None, False, None, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, None, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, None, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, None, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, None, False, None, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, None, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, None, False, False, False, False, False, False, False, True, None, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, True, True, False, True, True, None, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, None, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, None, False, False, None, False, False, False, False, False, None, False, False, False, False, False, False, False, None, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, None, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None] + self.assertEqual(doc._.locations,locations) + + def test_propn_locations(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.propn_locations,0.014204545454545454) + + def test_deictics(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + deictics = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] + self.assertEqual(doc._.deictics,deictics) + + def test_propn_deictics(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + self.assertEqual(doc._.propn_deictics,0.041193181818181816) + +""" From d9257126616de0f37a45b3db5114dc6dfdd07329 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Thu, 23 May 2024 11:11:10 -0400 Subject: [PATCH 04/39] consolidated AWE_Info declarations for better readability --- awe_workbench/web/parserServer.py | 1375 +++++++++++------------------ 1 file changed, 501 insertions(+), 874 deletions(-) diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index 41b5a5b..3de0b66 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -41,188 +41,189 @@ async def kill(self, websocket): exit() summaryLabels = [ - 'mean_nSyll', - 'med_nSyll', - 'max_nSyll', - 'min_nSyll', - 'std_nSyll', - 'mean_sqnChars', - 'med_sqnChars', - 'max_sqnChars', - 'min_sqnChars', - 'std_sqnChars', - 'propn_latinate', - 'propn_academic', - 'mean_family_size', - 'med_family_size', - 'max_family_size', - 'min_family_size', - 'std_family_size', - 'mean_concreteness', - 'med_concreteness', - 'max_concreteness', - 'min_concreteness', - 'std_concreteness', - 'mean_logNSenses', - 'med_logNSenses', - 'max_logNSenses', - 'min_logNSenses', - 'std_logNSenses', - 'mean_nMorph', - 'med_nMorph', - 'max_nMorph', - 'min_nMorph', - 'std_nMorph', - 'mean_logfreq_HAL', - 'med_logfreq_HAL', - 'max_logfreq_HAL', - 'min_logfreq_HAL', - 'std_logfreq_HAL', - 'mean_root_fam_size', - 'med_root_fam_size', - 'max_root_fam_size', - 'min_root_fam_size', - 'std_root_fam_size', - 'mean_root_pfmf', - 'med_root_pfmf', - 'max_root_pfmf', - 'min_root_pfmf', - 'std_root_pfmf', - 'mean_token_frequency', - 'median_token_frequency', - 'max_token_frequency', - 'min_token_frequency', - 'std_token_frequency', - 'mean_lemma_frequency', - 'median_lemma_frequency', - 'max_lemma_frequency', - 'min_lemma_frequency', - 'std_lemma_frequency', - 'mean_max_frequency', - 'median_max_frequency', - 'max_max_frequency', - 'min_max_frequency', - 'std_max_frequency', - 'propn_abstract_traits', - 'propn_animates', - 'propn_deictics', - 'wf_type_count', - 'lemma_type_count', - 'type_count', - 'token_count', - 'paragraph_count', - 'mean_paragraph_length', - 'median_paragraph_length', - 'max_paragraph_length', - 'min_paragraph_length', - 'stdev_paragraph_length', - 'propn_transition_words', - 'transition_category_count', - 'transition_word_type_count', - 'mean_transition_distance', - 'median_transition_distance', - 'max_transition_distance', - 'min_transition_distance', - 'stdev_transition_distance', - 'mean_sent_cohesion', - 'median_sent_cohesion', - 'max_sent_cohesion', - 'min_sent_cohesion', - 'stdev_sent_cohesion', - 'mean_slider_cohesion', - 'median_slider_cohesion', - 'max_slider_cohesion', - 'min_slider_cohesion', - 'stdev_slider_cohesion', - 'num_corefs', - 'mean_coref_chain_len', - 'median_coref_chain_len', - 'max_coref_chain_len', - 'min_coref_chain_len', - 'stdev_coref_chain_len', - 'sentence_count', - 'mean_sentence_len', - 'median_sentence_len', - 'max_sentence_len', - 'min_sentence_len', - 'std_sentence_len', - 'mean_words_to_sentence_root', - 'median_words_to_sentence_root', - 'max_words_to_sentence_root', - 'min_words_to_sentence_root', - 'stdev_words_to_sentence_root', - 'meanRhemeDepth', - 'medianRhemeDepth', - 'maxRhemeDepth', - 'minRhemeDepth', - 'stdevRhemeDepth', - 'meanThemeDepth', - 'medianThemeDepth', - 'maxThemeDepth', - 'minThemeDepth', - 'stdevThemeDepth', - 'meanWeightedDepth', - 'medianWeightedDepth', - 'maxWeightedDepth', - 'minWeightedDepth', - 'stdevWeightedDepth', - 'meanWeightedBreadth', - 'medianWeightedBreadth', - 'maxWeightedBreadth', - 'minWeightedBreadth', - 'stdevWeightedBreadth', - 'syntacticVariety', - 'propn_past', - 'propn_argument_words', - 'propn_direct_speech', - 'propn_egocentric', - 'propn_allocentric', - 'mean_subjectivity', - 'median_subjectivity', - 'min_subjectivity', - 'max_subjectivity', - 'stdev_subjectivity', - 'mean_polarity', - 'median_polarity', - 'min_polarity', - 'max_polarity', - 'stdev_polarity', - 'mean_sentiment', - 'median_sentiment', - 'min_sentiment', - 'max_sentiment', - 'stdev_sentiment', - 'mean_main_cluster_span', - 'median_main_cluster_span', - 'min_main_cluster_span', - 'max_main_cluster_span', - 'stdev_main_cluster_span', - 'propn_devwords', - 'mean_devword_nsyll', - 'median_devword_nsyll', - 'min_devword_nsyll', - 'max_devword_nsyll', - 'stdev_devword_nsyll', - 'mean_devword_nmorph', - 'median_devword_nmorph', - 'min_devword_nmorph', - 'max_devword_nmorph', - 'stdev_devword_nmorph', - 'mean_devword_nsenses', - 'median_devword_nsenses', - 'min_devword_nsenses', - 'max_devword_nsenses', - 'stdev_devword_nsenses', - 'mean_devword_token_freq', - 'median_devword_token_freq', - 'min_devword_token_freq', - 'max_devword_token_freq', - 'stdev_devword_token_freq', - 'mean_devword_concreteness', - 'median_devword_concreteness', - 'min_devword_concreteness', - 'max_devword_concreteness', - 'stdev_devword_concreteness'] + 'mean_nSyll', + 'med_nSyll', + 'max_nSyll', + 'min_nSyll', + 'std_nSyll', + 'mean_sqnChars', + 'med_sqnChars', + 'max_sqnChars', + 'min_sqnChars', + 'std_sqnChars', + 'propn_latinate', + 'propn_academic', + 'mean_family_size', + 'med_family_size', + 'max_family_size', + 'min_family_size', + 'std_family_size', + 'mean_concreteness', + 'med_concreteness', + 'max_concreteness', + 'min_concreteness', + 'std_concreteness', + 'mean_logNSenses', + 'med_logNSenses', + 'max_logNSenses', + 'min_logNSenses', + 'std_logNSenses', + 'mean_nMorph', + 'med_nMorph', + 'max_nMorph', + 'min_nMorph', + 'std_nMorph', + 'mean_logfreq_HAL', + 'med_logfreq_HAL', + 'max_logfreq_HAL', + 'min_logfreq_HAL', + 'std_logfreq_HAL', + 'mean_root_fam_size', + 'med_root_fam_size', + 'max_root_fam_size', + 'min_root_fam_size', + 'std_root_fam_size', + 'mean_root_pfmf', + 'med_root_pfmf', + 'max_root_pfmf', + 'min_root_pfmf', + 'std_root_pfmf', + 'mean_token_frequency', + 'median_token_frequency', + 'max_token_frequency', + 'min_token_frequency', + 'std_token_frequency', + 'mean_lemma_frequency', + 'median_lemma_frequency', + 'max_lemma_frequency', + 'min_lemma_frequency', + 'std_lemma_frequency', + 'mean_max_frequency', + 'median_max_frequency', + 'max_max_frequency', + 'min_max_frequency', + 'std_max_frequency', + 'propn_abstract_traits', + 'propn_animates', + 'propn_deictics', + 'wf_type_count', + 'lemma_type_count', + 'type_count', + 'token_count', + 'paragraph_count', + 'mean_paragraph_length', + 'median_paragraph_length', + 'max_paragraph_length', + 'min_paragraph_length', + 'stdev_paragraph_length', + 'propn_transition_words', + 'transition_category_count', + 'transition_word_type_count', + 'mean_transition_distance', + 'median_transition_distance', + 'max_transition_distance', + 'min_transition_distance', + 'stdev_transition_distance', + 'mean_sent_cohesion', + 'median_sent_cohesion', + 'max_sent_cohesion', + 'min_sent_cohesion', + 'stdev_sent_cohesion', + 'mean_slider_cohesion', + 'median_slider_cohesion', + 'max_slider_cohesion', + 'min_slider_cohesion', + 'stdev_slider_cohesion', + 'num_corefs', + 'mean_coref_chain_len', + 'median_coref_chain_len', + 'max_coref_chain_len', + 'min_coref_chain_len', + 'stdev_coref_chain_len', + 'sentence_count', + 'mean_sentence_len', + 'median_sentence_len', + 'max_sentence_len', + 'min_sentence_len', + 'std_sentence_len', + 'mean_words_to_sentence_root', + 'median_words_to_sentence_root', + 'max_words_to_sentence_root', + 'min_words_to_sentence_root', + 'stdev_words_to_sentence_root', + 'meanRhemeDepth', + 'medianRhemeDepth', + 'maxRhemeDepth', + 'minRhemeDepth', + 'stdevRhemeDepth', + 'meanThemeDepth', + 'medianThemeDepth', + 'maxThemeDepth', + 'minThemeDepth', + 'stdevThemeDepth', + 'meanWeightedDepth', + 'medianWeightedDepth', + 'maxWeightedDepth', + 'minWeightedDepth', + 'stdevWeightedDepth', + 'meanWeightedBreadth', + 'medianWeightedBreadth', + 'maxWeightedBreadth', + 'minWeightedBreadth', + 'stdevWeightedBreadth', + 'syntacticVariety', + 'propn_past', + 'propn_argument_words', + 'propn_direct_speech', + 'propn_egocentric', + 'propn_allocentric', + 'mean_subjectivity', + 'median_subjectivity', + 'min_subjectivity', + 'max_subjectivity', + 'stdev_subjectivity', + 'mean_polarity', + 'median_polarity', + 'min_polarity', + 'max_polarity', + 'stdev_polarity', + 'mean_sentiment', + 'median_sentiment', + 'min_sentiment', + 'max_sentiment', + 'stdev_sentiment', + 'mean_main_cluster_span', + 'median_main_cluster_span', + 'min_main_cluster_span', + 'max_main_cluster_span', + 'stdev_main_cluster_span', + 'propn_devwords', + 'mean_devword_nsyll', + 'median_devword_nsyll', + 'min_devword_nsyll', + 'max_devword_nsyll', + 'stdev_devword_nsyll', + 'mean_devword_nmorph', + 'median_devword_nmorph', + 'min_devword_nmorph', + 'max_devword_nmorph', + 'stdev_devword_nmorph', + 'mean_devword_nsenses', + 'median_devword_nsenses', + 'min_devword_nsenses', + 'max_devword_nsenses', + 'stdev_devword_nsenses', + 'mean_devword_token_freq', + 'median_devword_token_freq', + 'min_devword_token_freq', + 'max_devword_token_freq', + 'stdev_devword_token_freq', + 'mean_devword_concreteness', + 'median_devword_concreteness', + 'min_devword_concreteness', + 'max_devword_concreteness', + 'stdev_devword_concreteness' + ] async def run_parser(self, websocket, path): current_doc = '' @@ -347,15 +348,13 @@ async def run_parser(self, websocket, path): indic = messagelist[2] itype = messagelist[3] await websocket.send( - doc._.AWE_Info(indicator=indic, - infoType=itype)) + doc._.AWE_Info(indicator=indic,infoType=itype)) elif len(messagelist) == 5: indic = messagelist[2] itype = messagelist[3] summ = messagelist[4] - result = doc._.AWE_Info(indicator=indic, - infoType=itype, - summaryType=summ) + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ) if type(result) in [int, float, bool]: await websocket.send(str(result)) else: @@ -366,10 +365,8 @@ async def run_parser(self, websocket, path): itype = messagelist[3] summ = messagelist[4] filt = json.loads(messagelist[5]) - result = doc._.AWE_Info(indicator=indic, - infoType=itype, - summaryType=summ, - filters=filt) + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt) if type(result) in [int, float]: await websocket.send(str(result)) else: @@ -380,11 +377,8 @@ async def run_parser(self, websocket, path): summ = messagelist[4] filt = json.loads(messagelist[5]) trans = json.loads(messagelist[6]) - result = doc._.AWE_Info(indicator=indic, - infoType=itype, - summaryType=summ, - filters=filt, - transformations=trans) + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt,transformations=trans) if type(result) in [int, float]: await websocket.send(str(result)) else: @@ -455,24 +449,29 @@ async def run_parser(self, websocket, path): command = 'LEMMAS' label = messagelist[1] doc = self.parser.get_document(label) - await websocket.send(doc._.AWE_Info(indicator='lemma_')) + await websocket.send( + doc._.AWE_Info(indicator='lemma_') + ) elif messagelist[0] == 'STOPWORDS': label = messagelist[1] doc = self.parser.get_document(label) - await websocket.send(doc._.AWE_Info(indicator='is_stop')) + await websocket.send( + doc._.AWE_Info(indicator='is_stop') + ) elif messagelist[0] == 'WORDTYPES': command = 'WORDTYPES' label = messagelist[1] doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.AWE_Info( - indicator='lower_',filters=[('is_alpha', ['True']), - ('is_stop', ['False'])],summaryType = 'uniq'))) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='lower_',filters=[('is_alpha', ['True']),('is_stop', ['False'])],summaryType = 'uniq') + )) elif messagelist[0] == 'ROOTS': command = 'ROOTS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root'))) + doc._.AWE_Info(indicator='root') + )) elif messagelist[0] == 'SYLLABLES': command = 'SYLLABLES' label = messagelist[1] @@ -484,23 +483,22 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='text', \ - filters=[('is_alpha', ['True'])], \ - transformations=['len', 'sqrt']))) + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt']) + )) elif messagelist[0] == 'LATINATES': command = 'LATINATES' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='is_latinate', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'ACADEMICS': command = 'ACADEMICS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='is_academic', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'SENSENUMS': command = 'SENSENUMS' # Position in the list returned equals position @@ -508,44 +506,43 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nSenses', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'LOGSENSENUMS': command = 'LOGSENSENUMS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nSenses', - filters=[('is_alpha', ['True'])], - transformations=['log']))) + doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])],transformations=['log']) + )) elif messagelist[0] == 'MORPHOLOGY': command = 'MORPHOLOGY' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='morphology'))) + doc._.AWE_Info(indicator='morphology') + )) elif messagelist[0] == 'MORPHNUMS': command = 'MORPHNUMS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nMorph', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='nMorph',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'HALROOTFREQS': command = 'HALROOTFREQS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='min_root_freq', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'HALLOGROOTFREQS': command = 'HALLOGROOTFREQS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='min_root_freq', - filters=[('is_alpha', ['True'])], - transformations=['log']))) + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log']) + )) elif messagelist[0] == 'ROOTFAMSIZES': command = 'ROOTFAMSIZES' # Position in the list returned equals position @@ -553,8 +550,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root_famSize', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'ROOTPFMFS': command = 'ROOTPFMFS' # Position in the list returned equals position @@ -562,8 +559,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root_pfmf', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'FAMILYSIZES': command = 'FAMILYSIZES' # Position in the list returned equals position @@ -571,8 +568,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='family_size', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='family_size',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'TOKFREQS': command = 'TOKFREQS' # Position in the list returned equals position @@ -580,8 +577,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='token_freq', - filters=[('is_alpha', ['True'])]))) + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])]) + )) elif messagelist[0] == 'LEMMAFREQS': command = 'LEMMAfREQS' # Position in the list returned equals position @@ -658,8 +655,8 @@ async def run_parser(self, websocket, path): doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc", - indicator='delimiter_\n'))) + doc._.AWE_Info(infoType="Doc",indicator='delimiter_n') + )) # doc._.paragraph_breaks)) elif messagelist[0] == 'SENTENCES': command = 'SENTENCES' @@ -669,8 +666,8 @@ async def run_parser(self, websocket, path): doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc", - indicator='sents'))) + doc._.AWE_Info(infoType="Doc",indicator='sents') + )) #await websocket.send(json.dumps( # [(sent.start, sent.end) for sent in doc.sents])) elif messagelist[0] == 'PARAGRAPHLENS': @@ -680,9 +677,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['tokenlen']))) + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen']) + )) elif messagelist[0] == 'TRANSITIONPROFILE': command = 'TRANSITIONPROFILE' # Returns a rich data structure in a list containing @@ -703,8 +699,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc', - indicator='transitions'))) + doc._.AWE_Info(infoType='Doc',indicator='transitions') + )) elif messagelist[0] == 'TRANSITIONDISTANCES': command = 'TRANSITIONDISTANCES' # List of cosine distances between ten-word windows @@ -712,8 +708,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc', - indicator='transition_distances'))) + doc._.AWE_Info(infoType='Doc',indicator='transition_distances') + )) elif messagelist[0] == 'SENTENCECOHESIONS': command = 'SENTENCECOHESIONS' # List of cosine distances between ten-word windows @@ -721,8 +717,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc', - indicator='intersentence_cohesions'))) + doc._.AWE_Info(infoType='Doc',indicator='intersentence_cohesions') + )) elif messagelist[0] == 'SLIDERCOHESIONS': command = 'SLIDERCOHESIONS' # List of cosine distances between ten-word windows @@ -730,8 +726,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc', - indicator='sliding_window_cohesions'))) + doc._.AWE_Info(infoType='Doc',indicator='sliding_window_cohesions') + )) elif messagelist[0] == 'COREFCHAINS': command = 'COREFCHAINS' # List of coreference chains found in document @@ -747,8 +743,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc', - indicator='syntacticDepthsOfRhemes'))) + doc._.AWE_Info(infoType='Doc',indicator='syntacticDepthsOfRhemes') + )) elif messagelist[0] == 'THEMEDEPTHS': command = 'THEMEDEPTHS' # Syntactic depth of the sentence theme -- part @@ -757,7 +753,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='syntacticDepthsOfThemes'))) + doc._.AWE_Info(indicator='syntacticDepthsOfThemes') + )) elif messagelist[0] == 'WEIGHTEDDEPTHS': command = 'WEIGHTEDDEPTHS' # Syntactic depth weighted to penalize @@ -766,7 +763,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='weightedSyntacticDepths'))) + doc._.AWE_Info(indicator='weightedSyntacticDepths') + )) elif messagelist[0] == 'WEIGHTEDBREADTHS': command = 'WEIGHTEDBREADTHS' # Syntactic breadth -- measure of extent to which sentence @@ -776,7 +774,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='weightedSyntacticBreadths'))) + doc._.AWE_Info(indicator='weightedSyntacticBreadths') + )) elif messagelist[0] == 'SENTENCETYPES': # tuple giving number and location of sentence types # format: @@ -787,8 +786,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc', - indicator='sentence_types'))) + doc._.AWE_Info(infoType='Doc',indicator='sentence_types') + )) elif messagelist[0] == 'SYNTACTICPROFILE': command = 'SYNTACTICPROFILE' # Returns a dictionary containing frequency information @@ -817,7 +816,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_quoted'))) + doc._.AWE_Info(indicator='vwp_quoted') + )) elif messagelist[0] == 'DIRECTSPEECHSPANS': command = 'DIRECTSPEECHSPANS' # Data about subset of quoted text -- specifically, @@ -845,8 +845,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc", - indicator='vwp_direct_speech'))) + doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech') + )) elif messagelist[0] == 'IN_DIRECT_SPEECH': # 1 for tokens within quoted stretches of direct speech, # 0 for other text. Position in the list corresponds to @@ -854,7 +854,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_in_direct_speech'))) + doc._.AWE_Info(indicator='vwp_in_direct_speech') + )) elif messagelist[0] == 'TENSECHANGES': # list of positions where tense changed in the main # document flow (not in direct speech/quotations, @@ -869,39 +870,43 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_perspective'))) + doc._.AWE_Info(indicator='vwp_perspective') + )) elif messagelist[0] == 'ATTRIBUTIONS': # list of positions where attribution is indicated label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_attribution'))) + doc._.AWE_Info(indicator='vwp_attribution') + )) elif messagelist[0] == 'SOURCES': # list of positions where source is indicated label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_source'))) + doc._.AWE_Info(indicator='vwp_source') + )) elif messagelist[0] == 'CITES': # list of positions where source is indicated label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_cite'))) + doc._.AWE_Info(indicator='vwp_cite') + )) elif messagelist[0] == 'STATEMENTSOFFACT': # list of positions where source is indicated label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc", - indicator='vwp_statements_of_fact'))) + doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_fact') + )) elif messagelist[0] == 'STATEMENTSOFOPINION': # list of positions where source is indicated label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc", - indicator='vwp_statements_of_opinion'))) + doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_opinion') + )) elif messagelist[0] == 'PERSPECTIVESPANS': command = 'PERSPECTIVESPANS' label = messagelist[1] @@ -909,50 +914,54 @@ async def run_parser(self, websocket, path): await websocket.send(json.dumps( doc._.vwp_perspective_spans)) await websocket.send( - doc._.AWE_Info(infoType="Doc", - indicator='vwp_perspective_spans')) + doc._.AWE_Info(infoType="Doc",indicator='vwp_perspective_spans') + ) elif messagelist[0] == 'STANCEMARKERS': command = 'STANCEMARKERS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps(doc._.vwp_stance_markers)) await websocket.send( - doc._.AWE_Info(infoType="Doc", - indicator='vwp_stance_markers')) + doc._.AWE_Info(infoType="Doc",indicator='vwp_stance_markers') + ) elif messagelist[0] == 'CLAIMTEXTS': label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_claim'))) + doc._.AWE_Info(indicator='vwp_claim') + )) elif messagelist[0] == 'DISCUSSIONTEXTS': label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_discussion'))) + doc._.AWE_Info(indicator='vwp_discussion') + )) elif messagelist[0] == 'EMOTIONWORDS': command = 'EMOTIONWORDS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_emotionword'))) + doc._.AWE_Info(indicator='vwp_emotionword') + )) elif messagelist[0] == 'CHARACTERWORDS': command = 'CHARACTERWORDS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_character_traits'))) + doc._.AWE_Info(indicator='vwp_character_traits') + )) elif messagelist[0] == 'EMOTIONALSTATES': command = 'EMOTIONALSTATES' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send( - doc._.AWE_Info(infoType="Doc", - indicator='vwp_emotion_states')) + doc._.AWE_Info(infoType="Doc",indicator='vwp_emotion_states') + ) elif messagelist[0] == 'CHARACTERTRAITS': command = 'CHARACTERTRAITS' label = messagelist[1] @@ -964,68 +973,77 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc", - indicator='vwp_propositional_attitudes'))) + doc._.AWE_Info(infoType="Doc",indicator='vwp_propositional_attitudes') + )) elif messagelist[0] == 'SOCIAL_AWARENESS': command = 'SOCIAL_AWARENESS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc", - indicator='vwp_social_awareness'))) + doc._.AWE_Info(infoType="Doc",indicator='vwp_social_awareness') + )) elif messagelist[0] == 'CONCRETEDETAILS': command = 'CONCRETEDETAILS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send( - doc._.AWE_Info(indicator='concrete_detail')) + doc._.AWE_Info(indicator='concrete_detail') + ) elif messagelist[0] == 'INTERACTIVELANGUAGE': command = 'INTERACTIVELANGUAGE' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_interactive'))) + doc._.AWE_Info(indicator='vwp_interactive') + )) elif messagelist[0] == 'ARGUMENTWORDS': command = 'ARGUMENTWORDS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_argumentword'))) + doc._.AWE_Info(indicator='vwp_argumentword') + )) elif messagelist[0] == 'ARGUMENTLANGUAGE': command = 'ARGUMENTLANGUAGE' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_argumentation'))) + doc._.AWE_Info(indicator='vwp_argumentation') + )) elif messagelist[0] == 'EXPLICITARGUMENTWORDS': label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_explicit_argument'))) + doc._.AWE_Info(indicator='vwp_explicit_argument') + )) elif messagelist[0] == 'SUBJECTIVITYRATINGS': command = 'SUBJECTIVITYRATINGS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='subjectivity'))) + doc._.AWE_Info(indicator='subjectivity') + )) elif messagelist[0] == 'SENTIMENTRATINGS': command = 'SENTIMENTRATINGS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_sentiment'))) + doc._.AWE_Info(indicator='vwp_sentiment') + )) elif messagelist[0] == 'TONERATINGS': command = 'TONERATINGS2' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_tone'))) + doc._.AWE_Info(indicator='vwp_tone') + )) elif messagelist[0] == 'POLARITYRATINGS': command = 'POLARITYRATINGS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='polarity'))) + doc._.AWE_Info(indicator='polarity') + )) elif messagelist[0] == 'ASSESSMENTS': command = 'ASSESSMENTS' label = messagelist[1] @@ -1036,18 +1054,21 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='in_past_tense_scope'))) + doc._.AWE_Info(indicator='in_past_tense_scope') + )) elif messagelist[0] == 'GOVERNINGSUBJECTS': command = 'GOVERNINGSUBJECTS' label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='governing_subject'))) + doc._.AWE_Info(indicator='governing_subject') + )) elif messagelist[0] == 'CLUSTERS': label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='clusterID'))) + doc._.AWE_Info(indicator='clusterID') + )) elif messagelist[0] == 'PROMPTLANGUAGE': label = messagelist[1] doc = self.parser.get_document(label) @@ -1060,20 +1081,20 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send( - doc._.AWE_Info(infoType="Doc", - indicator='main_ideas')) + doc._.AWE_Info(infoType="Doc",indicator='main_ideas') + ) elif messagelist[0] == 'SUPPORTINGIDEAS': label = messagelist[1] doc = self.parser.get_document(label) await websocket.send( - doc._.AWE_Info(infoType="Doc", - indicator='supporting_ideas')) + doc._.AWE_Info(infoType="Doc",indicator='supporting_ideas') + ) elif messagelist[0] == 'SUPPORTINGDETAILS': label = messagelist[1] doc = self.parser.get_document(label) await websocket.send( - doc._.AWE_Info(infoType="Doc", - indicator='supporting_details')) + doc._.AWE_Info(infoType="Doc",indicator='supporting_details') + ) elif messagelist[0] == 'CLUSTERINFO': command = 'CLUSTERINFO' # Get the local word clusters our algorithm has @@ -1097,7 +1118,8 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) await websocket.send(json.dumps( - doc._.AWE_Info(indicator='devword'))) + doc._.AWE_Info(indicator='devword') + )) elif messagelist[0] == 'NOMINALREFERENCES': command = 'NOMINALREFERENCES' # offset of the logical subject that governs @@ -1113,584 +1135,189 @@ async def run_parser(self, websocket, path): label = messagelist[1] doc = self.parser.get_document(label) summaryFeats = [ - doc._.AWE_Info(indicator='nSyll', - summaryType="mean"), - doc._.AWE_Info(indicator='nSyll', - summaryType="median"), - doc._.AWE_Info(indicator='nSyll', - summaryType="max"), - doc._.AWE_Info(indicator='nSyll', - summaryType="min"), - doc._.AWE_Info(indicator='nSyll', - summaryType="stdev"), - doc._.AWE_Info(indicator='text', \ - filters=[('is_alpha', ['True'])], \ - transformations=['len', 'sqrt'], \ - summaryType='mean'), - doc._.AWE_Info(indicator='text', \ - filters=[('is_alpha', ['True'])], \ - transformations=['len', 'sqrt'], \ - summaryType='median'), - doc._.AWE_Info(indicator='text', \ - filters=[('is_alpha', ['True'])], \ - transformations=['len', 'sqrt'], \ - summaryType='max'), - doc._.AWE_Info(indicator='text', \ - filters=[('is_alpha', ['True'])], \ - transformations=['len', 'sqrt'], \ - summaryType='min'), - doc._.AWE_Info(indicator='text', \ - filters=[('is_alpha', ['True'])], \ - transformations=['len', 'sqrt'], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='is_latinate', - filters=[('is_alpha', ['True'])], \ - summaryType="proportion"), - doc._.AWE_Info(indicator='is_academic', - filters=[('is_alpha', ['True'])], \ - summaryType="proportion"), - doc._.AWE_Info(indicator='family_size', \ - filters=[('is_alpha', ['True'])], \ - summaryType='mean'), - doc._.AWE_Info(indicator='family_size', \ - filters=[('is_alpha', ['True'])], \ - summaryType='median'), - doc._.AWE_Info(indicator='family_size', \ - filters=[('is_alpha', ['True'])], \ - summaryType='max'), - doc._.AWE_Info(indicator='family_size', \ - filters=[('is_alpha', ['True'])], \ - summaryType='min'), - doc._.AWE_Info(indicator='family_size', \ - filters=[('is_alpha', ['True'])], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True'])], \ - summaryType='mean'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True'])], \ - summaryType='median'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True'])], \ - summaryType='max'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True'])], \ - summaryType='min'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True'])], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True'])], \ - transformations=['log'], \ - summaryType='mean'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True'])], \ - transformations=['log'], \ - summaryType='median'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True'])], \ - transformations=['log'], \ - summaryType='max'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True'])], \ - transformations=['log'], \ - summaryType='min'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True'])], \ - transformations=['log'], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True'])], \ - summaryType='mean'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True'])], \ - summaryType='median'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True'])], \ - summaryType='max'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True'])], \ - summaryType='min'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True'])], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='min_root_freq', - filters=[('is_alpha', ['True'])], - transformations=['log'], - summaryType='mean'), - doc._.AWE_Info(indicator='min_root_freq', - filters=[('is_alpha', ['True'])], - transformations=['log'], - summaryType='median'), - doc._.AWE_Info(indicator='min_root_freq', - filters=[('is_alpha', ['True'])], - transformations=['log'], - summaryType='max'), - doc._.AWE_Info(indicator='min_root_freq', - filters=[('is_alpha', ['True'])], - transformations=['log'], - summaryType='min'), - doc._.AWE_Info(indicator='min_root_freq', - filters=[('is_alpha', ['True'])], - transformations=['log'], - summaryType='stdev'), - doc._.AWE_Info(indicator='root_famSize', - filters=[('is_alpha', ['True'])], - summaryType='mean'), - doc._.AWE_Info(indicator='root_famSize', - filters=[('is_alpha', ['True'])], - summaryType='median'), - doc._.AWE_Info(indicator='root_famSize', - filters=[('is_alpha', ['True'])], - summaryType='max'), - doc._.AWE_Info(indicator='root_famSize', - filters=[('is_alpha', ['True'])], - summaryType='min'), - doc._.AWE_Info(indicator='root_famSize', - filters=[('is_alpha', ['True'])], - summaryType='stdev'), - doc._.AWE_Info(indicator='root_pfmf', - filters=[('is_alpha', ['True'])], - summaryType='mean'), - doc._.AWE_Info(indicator='root_pfmf', - filters=[('is_alpha', ['True'])], - summaryType='median'), - doc._.AWE_Info(indicator='root_pfmf', - filters=[('is_alpha', ['True'])], - summaryType='max'), - doc._.AWE_Info(indicator='root_pfmf', - filters=[('is_alpha', ['True'])], - summaryType='min'), - doc._.AWE_Info(indicator='root_pfmf', - filters=[('is_alpha', ['True'])], - summaryType='stdev'), - doc._.AWE_Info(indicator='token_freq', - filters=[('is_alpha', ['True'])], - summaryType='mean'), - doc._.AWE_Info(indicator='token_freq', - filters=[('is_alpha', ['True'])], - summaryType='median'), - doc._.AWE_Info(indicator='token_freq', - filters=[('is_alpha', ['True'])], - summaryType='max'), - doc._.AWE_Info(indicator='token_freq', - filters=[('is_alpha', ['True'])], - summaryType='min'), - doc._.AWE_Info(indicator='token_freq', - filters=[('is_alpha', ['True'])], - summaryType='stdev'), - doc._.AWE_Info(indicator='lemma_freq', - filters=[('is_alpha', ['True'])], - summaryType='mean'), - doc._.AWE_Info(indicator='lemma_freq', - filters=[('is_alpha', ['True'])], - summaryType='median'), - doc._.AWE_Info(indicator='lemma_freq', - filters=[('is_alpha', ['True'])], - summaryType='max'), - doc._.AWE_Info(indicator='lemma_freq', - filters=[('is_alpha', ['True'])], - summaryType='min'), - doc._.AWE_Info(indicator='lemma_freq', - filters=[('is_alpha', ['True'])], - summaryType='stdev'), - doc._.AWE_Info(indicator='max_freq', - summaryType='mean'), - doc._.AWE_Info(indicator='max_freq', - summaryType='median'), - doc._.AWE_Info(indicator='max_freq', - summaryType='max'), - doc._.AWE_Info(indicator='max_freq', - summaryType='min'), - doc._.AWE_Info(indicator='max_freq', - summaryType='stdev'), - doc._.AWE_Info(indicator='abstract_trait', - filters=[('is_alpha', ['True'])], \ - summaryType="proportion"), - doc._.AWE_Info(indicator='animate', - filters=[('is_alpha', ['True'])], \ - summaryType="proportion"), - doc._.AWE_Info(indicator='deictic', - filters=[('is_alpha', ['True'])], \ - summaryType="proportion"), - doc._.AWE_Info(indicator='root', \ - filters=[('is_alpha', ['True']), - ('is_stop', ['False']), - ('pos_', content_pos)], \ - summaryType = 'total'), - doc._.AWE_Info(indicator='lemma_', \ - filters=[('is_alpha', ['True']), - ('is_stop', ['False']), - ('pos_', content_pos)], \ - summaryType = 'total'), - doc._.AWE_Info(indicator='lower_', \ - filters=[('is_alpha', ['True']), - ('is_stop', ['False']), - ('pos_', content_pos)], \ - summaryType = 'total'), - doc._.AWE_Info(indicator='text', \ - filters=[('is_alpha', ['True']), - ('is_stop', ['False']), - ('pos_', content_pos)], \ - summaryType = 'total'), - doc._.AWE_Info(infoType="Doc", - indicator='delimiter_\n', - summaryType='total'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['tokenlen'], - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['tokenlen'], - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['tokenlen'], - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['tokenlen'], - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['tokenlen'], - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='transitions', - summaryType='proportion'), - doc._.AWE_Info(infoType="Doc", - indicator='transitions', - summaryType='total'), - doc._.AWE_Info(infoType="Doc", - indicator='transitions', - transformations=['text'], - summaryType='counts'), - doc._.AWE_Info(infoType="Doc", - indicator='transition_distances', - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='transition_distances', - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='transition_distances', - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='transition_distances', - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='transition_distances', - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='intersentence_cohesions', - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='intersentence_cohesions', - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='intersentence_cohesions', - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='intersentence_cohesions', - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='intersentence_cohesions', - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='sliding_window_cohesions', - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='sliding_window_cohesions', - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='sliding_window_cohesions', - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='sliding_window_cohesions', - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='sliding_window_cohesions', - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='corefChainInfo', - summaryType='counts'), - doc._.AWE_Info(infoType="Doc", - indicator='corefChainInfo', - transformations=['len'], - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='corefChainInfo', - transformations=['len'], - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='corefChainInfo', - transformations=['len'], - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='corefChainInfo', - transformations=['len'], - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='corefChainInfo', - transformations=['len'], - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - summaryType='counts'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['len'], - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['len'], - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['len'], - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['len'], - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='sents', - transformations=['len'], - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='sentenceThemes', - transformations=['tokenlen'], - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='sentenceThemes', - transformations=['tokenlen'], - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='sentenceThemes', - transformations=['tokenlen'], - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='sentenceThemes', - transformations=['tokenlen'], - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='sentenceThemes', - transformations=['tokenlen'], - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfRhemes', - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfRhemes', - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfRhemes', - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfRhemes', - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfRhemes', - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfThemes', - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfThemes', - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfThemes', - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfThemes', - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='syntacticDepthsOfThemes', - summaryType='stdev'), - doc._.AWE_Info(indicator='weightedSyntacticDepth', - summaryType='mean'), - doc._.AWE_Info(indicator='weightedSyntacticDepth', - summaryType='median'), - doc._.AWE_Info(indicator='weightedSyntacticDepth', - summaryType='max'), - doc._.AWE_Info(indicator='weightedSyntacticDepth', - summaryType='min'), - doc._.AWE_Info(indicator='weightedSyntacticDepth', - summaryType='stdev'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth', - summaryType='mean'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth', - summaryType='median'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth', - summaryType='max'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth', - summaryType='min'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth', - summaryType='stdev'), + doc._.AWE_Info(indicator='nSyll',summaryType="mean"), + doc._.AWE_Info(indicator='nSyll',summaryType="median"), + doc._.AWE_Info(indicator='nSyll',summaryType="max"), + doc._.AWE_Info(indicator='nSyll',summaryType="min"), + doc._.AWE_Info(indicator='nSyll',summaryType="stdev"), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='mean'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='median'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='max'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='min'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='stdev'), + doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='mean'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='median'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='max'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='min'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='stdev'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='mean'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='median'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='max'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='min'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='stdev'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='max_freq',summaryType='mean'), + doc._.AWE_Info(indicator='max_freq',summaryType='median'), + doc._.AWE_Info(indicator='max_freq',summaryType='max'), + doc._.AWE_Info(indicator='max_freq',summaryType='min'), + doc._.AWE_Info(indicator='max_freq',summaryType='stdev'), + doc._.AWE_Info(indicator='abstract_trait',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='animate',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='deictic',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='root', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='lemma_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='lower_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(infoType="Doc",indicator='delimiter_n',summaryType='total'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='proportion'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='total'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',transformations=['text'],summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sents',summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='stdev'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='mean'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='median'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='max'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='min'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='stdev'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='mean'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='median'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='max'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='min'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='stdev'), doc._.syntacticVariety, - doc._.AWE_Info(indicator='in_past_tense_scope', - summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_argumentation', - summaryType='proportion'), - doc._.AWE_Info(infoType="Doc", - indicator='vwp_direct_speech', - summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_egocentric', - summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_allocentric', - summaryType='proportion'), - doc._.AWE_Info(indicator='subjectivity', - summaryType='mean'), - doc._.AWE_Info(indicator='subjectivity', - summaryType='median'), - doc._.AWE_Info(indicator='subjectivity', - summaryType='min'), - doc._.AWE_Info(indicator='subjectivity', - summaryType='max'), - doc._.AWE_Info(indicator='subjectivity', - summaryType='stdev'), - doc._.AWE_Info(indicator='polarity', - summaryType='mean'), - doc._.AWE_Info(indicator='polarity', - summaryType='median'), - doc._.AWE_Info(indicator='polarity', - summaryType='min'), - doc._.AWE_Info(indicator='polarity', - summaryType='max'), - doc._.AWE_Info(indicator='polarity', - summaryType='stdev'), - doc._.AWE_Info(indicator='vwp_sentiment', - summaryType='mean'), - doc._.AWE_Info(indicator='vwp_sentiment', - summaryType='median'), - doc._.AWE_Info(indicator='vwp_sentiment', - summaryType='min'), - doc._.AWE_Info(indicator='vwp_sentiment', - summaryType='max'), - doc._.AWE_Info(indicator='vwp_sentiment', - summaryType='stdev'), - doc._.AWE_Info(infoType="Doc", - indicator='main_cluster_spans', - transformations=['len'], - summaryType='mean'), - doc._.AWE_Info(infoType="Doc", - indicator='main_cluster_spans', - transformations=['len'], - summaryType='median'), - doc._.AWE_Info(infoType="Doc", - indicator='main_cluster_spans', - transformations=['len'], - summaryType='min'), - doc._.AWE_Info(infoType="Doc", - indicator='main_cluster_spans', - transformations=['len'], - summaryType='max'), - doc._.AWE_Info(infoType="Doc", - indicator='main_cluster_spans', - transformations=['len'], - summaryType='stdev'), - doc._.AWE_Info(indicator='devword', \ - summaryType='proportion'), - doc._.AWE_Info(indicator='nSyll', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='mean'), - doc._.AWE_Info(indicator='nSyll', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='median'), - doc._.AWE_Info(indicator='nSyll', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='min'), - doc._.AWE_Info(indicator='nSyll', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='max'), - doc._.AWE_Info(indicator='nSyll', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='mean'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='median'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='min'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='max'), - doc._.AWE_Info(indicator='nMorph', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='mean'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='median'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='min'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='max'), - doc._.AWE_Info(indicator='nSenses', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='token_freq', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='mean'), - doc._.AWE_Info(indicator='token_freq', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='median'), - doc._.AWE_Info(indicator='token_freq', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='min'), - doc._.AWE_Info(indicator='token_freq', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='max'), - doc._.AWE_Info(indicator='token_freq', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='stdev'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='mean'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='median'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='min'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='max'), - doc._.AWE_Info(indicator='concreteness', \ - filters=[('is_alpha', ['True']), - ('devword', ['True'])], \ - summaryType='stdev')] + doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_argumentation',summaryType='proportion'), + doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_egocentric',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_allocentric',summaryType='proportion'), + doc._.AWE_Info(indicator='subjectivity',summaryType='mean'), + doc._.AWE_Info(indicator='subjectivity',summaryType='median'), + doc._.AWE_Info(indicator='subjectivity',summaryType='min'), + doc._.AWE_Info(indicator='subjectivity',summaryType='max'), + doc._.AWE_Info(indicator='subjectivity',summaryType='stdev'), + doc._.AWE_Info(indicator='polarity',summaryType='mean'), + doc._.AWE_Info(indicator='polarity',summaryType='median'), + doc._.AWE_Info(indicator='polarity',summaryType='min'), + doc._.AWE_Info(indicator='polarity',summaryType='max'), + doc._.AWE_Info(indicator='polarity',summaryType='stdev'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='mean'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='median'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='min'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='max'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(indicator='devword', summaryType='proportion'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev') + ] await websocket.send(json.dumps(summaryFeats)) else: await websocket.send(False) From cda6a1afc1cb752e290ae22d68d46ca8450946e1 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Wed, 29 May 2024 08:49:14 -0400 Subject: [PATCH 05/39] Added AWE_NLP tests from WO --- tests/test_awe_nlp.py | 306 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 tests/test_awe_nlp.py diff --git a/tests/test_awe_nlp.py b/tests/test_awe_nlp.py new file mode 100644 index 0000000..c211478 --- /dev/null +++ b/tests/test_awe_nlp.py @@ -0,0 +1,306 @@ +""" +--- [ Test: test_awe_nlp.py ] ----------------------------------------------------------- + +Set of corresponding tests for document features found in awe_nlp.py of writingobserver. + +Author: Caleb Scott (cwscott3@ncsu.edu) + +----------------------------------------------------------------------------------------- +""" + +# --- [ IMPORTS ] ----------------------------------------------------------------------- + +import holmes_extractor.manager as holmes +import unittest +from awe_components.components.utility_functions import print_parse_tree +from awe_workbench.pipeline import pipeline_def + +# --- [ CONSTS/VARS ] ------------------------------------------------------------------- + +holmes_manager = holmes.Manager( + 'en_core_web_lg', + perform_coreference_resolution=False, + number_of_workers=2, + extra_components=pipeline_def +) + +# --- [ SETUP ] ------------------------------------------------------------------------- + +# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses +holmes_manager.parse_and_register_document( + document_text="The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", + label='GRE_Sample_Essay' +) + +# --- [ CLASSES ] ----------------------------------------------------------------------- + +class AWENLPTest(unittest.TestCase): + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='is_academic',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_interactive',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='is_latinate',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_evaluation',summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_emotionword',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_argumentword',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_explicit_argument',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_opinion',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_fact',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',summaryType='counts')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['positive'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['conditional'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['consequential'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['contrastive'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['counterpoint'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['comparative'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['crossreferential'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['illustrative'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['negative'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['emphatic'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['evidentiary'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['general'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['ordinal'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['purposive'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['periphrastic'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['hypothetical'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['summative'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['introductory'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADJ'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADV'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['NOUN'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['PROPN'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['VERB'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['NUM'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADP'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['CCONJ'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['SCONJ'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['AUX'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['PRON'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',summaryType='counts')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Simple'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleComplexPred'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundPred'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundComplexPred'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Compound'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Complex'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['CompoundComplex'])],summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_source',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_attribution',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_cite',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_quoted',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='vwp_direct_speech',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_in_direct_speech',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_tone',filters=[('>', [0.4])],summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_tone',filters=[('<', [-0.4])],summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='concrete_details',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='main_ideas',summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='supporting_ideas',summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='supporting_details',summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='nSyll',filters=[('>', [3])],summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='max_freq',filters=[('<', [4])],summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='sents',summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='delimiter_n',summaryType='total')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='vwp_character',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='vwp_propositional_attitudes',summaryType='percent')) + + def test_is_academic(self): + doc = holmes_manager.get_document('GRE_Sample_Essay') + print(doc._.AWE_Info(infoType='Doc',indicator='vwp_social_awareness',summaryType='percent')) + +# --- [ END ] --------------------------------------------------------------------------- \ No newline at end of file From 3922774cfa8a8be9a8391d54360f5f94d29cf5a2 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Tue, 4 Jun 2024 10:50:10 -0400 Subject: [PATCH 06/39] Fixed typo --- awe_workbench/web/parserServer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index 3de0b66..eca4071 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -609,7 +609,7 @@ async def run_parser(self, websocket, path): # in the document label = messagelist[1] doc = self.parser.get_document(label) - await websocket.send(json_dumps( + await websocket.send(json.dumps( doc._.AWE_Info(indicator='concreteness'))) elif messagelist[0] == 'ABSTRACTTRAITS': command = 'ABSTRACTTRAITS' From 1dd2b709f2ee1241a1784cf91bf55dd74a8575f7 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Thu, 6 Jun 2024 10:19:45 -0400 Subject: [PATCH 07/39] Added install script for conda --- installation/conda_install.sh | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 installation/conda_install.sh diff --git a/installation/conda_install.sh b/installation/conda_install.sh new file mode 100644 index 0000000..8b227d9 --- /dev/null +++ b/installation/conda_install.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# AWE Workbench Install Script (for conda) +# Author: Caleb Scott + +# This script installs all necessary dependencies for AWE for python 3.11. +# We make the following assumptions: +# * You have a working conda environment running on python 3.11 +# * You are cd'd into the AWE_Workbench project directory. + +# Sanity Check: let the user know the preconditions. + +echo "========================= WARNING ===========================" +echo "\nYou are about to install AWE on this system." +echo "\nYou must have the following conditions met:" +echo "\n* You are currently using a python3.11 conda environment" +echo "\n* You are currently in the AWE_Workbench/installation dir" +echo "\n* All other repos have been downloaded:" +echo "\n > holmes-extractor-expandable" +echo "\n > AWE_LanguageTool" +echo "\n > AWE_SpellCorrect" +echo "\n > AWE_Lexica" +echo "\n > AWE_Components" +echo "\n=============================================================" + +read -p "\n\n Continue [Y/N]? " -n 1 -r +echo # (optional) move to a new line +if [[ $REPLY =~ ^[Yy]$ ]] +then + # Install repositories + + CODE_REPOS_LOC="../../" + echo "\n\n Installing from Source..." + + pip install -e "$CODE_REPOS_LOC/holmes-extractor-expandable/" + pip install -e "$CODE_REPOS_LOC/AWE_LanguageTool/" + pip install -e "$CODE_REPOS_LOC/AWE_SpellCorrect/" + pip install -e "$CODE_REPOS_LOC/AWE_Lexica/" + pip install -e "$CODE_REPOS_LOC/AWE_Components/" + pip install -e "$CODE_REPOS_LOC/AWE_Workbench/" + + # Install data + echo "\n\n Installing data..." + + python -m awe_workbench.setup.data --develop +fi + +# Optional: prompt user input to install the proper java version? +# Source; https://askubuntu.com/questions/1279677/how-to-install-openjdk-14-jdk-on-ubuntu-16-04 From 9770521b91ffb607a5815bc640f556691b85d184 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Thu, 6 Jun 2024 10:39:22 -0400 Subject: [PATCH 08/39] Updated install script --- installation/conda_install.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/installation/conda_install.sh b/installation/conda_install.sh index 8b227d9..36da81a 100644 --- a/installation/conda_install.sh +++ b/installation/conda_install.sh @@ -5,8 +5,9 @@ # This script installs all necessary dependencies for AWE for python 3.11. # We make the following assumptions: -# * You have a working conda environment running on python 3.11 +# * You have a working conda environment running on python 3.11. # * You are cd'd into the AWE_Workbench project directory. +# * You have pip installed in the conda environment. # Sanity Check: let the user know the preconditions. From 3207924914d1b02093049406d33cd3178dbccb47 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Thu, 6 Jun 2024 10:40:41 -0400 Subject: [PATCH 09/39] Updated install script --- installation/conda_install.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/installation/conda_install.sh b/installation/conda_install.sh index 8b227d9..36da81a 100644 --- a/installation/conda_install.sh +++ b/installation/conda_install.sh @@ -5,8 +5,9 @@ # This script installs all necessary dependencies for AWE for python 3.11. # We make the following assumptions: -# * You have a working conda environment running on python 3.11 +# * You have a working conda environment running on python 3.11. # * You are cd'd into the AWE_Workbench project directory. +# * You have pip installed in the conda environment. # Sanity Check: let the user know the preconditions. From a820c8f4ce16e345a4d79456de5bd219d7c3469e Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 6 Jun 2024 10:50:10 -0400 Subject: [PATCH 10/39] Added protobuf patch --- installation/conda_install.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/installation/conda_install.sh b/installation/conda_install.sh index 36da81a..eb3befa 100644 --- a/installation/conda_install.sh +++ b/installation/conda_install.sh @@ -40,6 +40,10 @@ then pip install -e "$CODE_REPOS_LOC/AWE_Components/" pip install -e "$CODE_REPOS_LOC/AWE_Workbench/" + # Patch: protobuf==3.20.0 + # Data does not properly install without this patch. + pip install protobuf==3.20.0 + # Install data echo "\n\n Installing data..." From 43c6975480d8473ba6ab0c5ff2b71fa77045492f Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Mon, 1 Jul 2024 11:37:07 -0400 Subject: [PATCH 11/39] Added baseline tests for WritingObserver --- tests/test_awe_nlp.py | 273 +++++++++++++++++++++--------------------- 1 file changed, 139 insertions(+), 134 deletions(-) diff --git a/tests/test_awe_nlp.py b/tests/test_awe_nlp.py index c211478..77d9159 100644 --- a/tests/test_awe_nlp.py +++ b/tests/test_awe_nlp.py @@ -12,6 +12,7 @@ import holmes_extractor.manager as holmes import unittest +import json from awe_components.components.utility_functions import print_parse_tree from awe_workbench.pipeline import pipeline_def @@ -27,6 +28,7 @@ # --- [ SETUP ] ------------------------------------------------------------------------- # GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses +# NOTE: in the examples directory, this sample is called gre6 holmes_manager.parse_and_register_document( document_text="The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", label='GRE_Sample_Essay' @@ -35,272 +37,275 @@ # --- [ CLASSES ] ----------------------------------------------------------------------- class AWENLPTest(unittest.TestCase): + def test_is_academic(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='is_academic',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='is_academic',summaryType='percent'), 22) - def test_is_academic(self): + def test_vwp_interactive_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_interactive',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_interactive',summaryType='percent'), 4) - def test_is_academic(self): + def test_is_latinate(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='is_latinate',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='is_latinate',summaryType='percent'), 13) - def test_is_academic(self): + def test_vwp_evaluation_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_evaluation',summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_evaluation',summaryType='total'), 704) - def test_is_academic(self): + def test_vwp_emotionword_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_emotionword',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_emotionword',summaryType='percent'), 2) - def test_is_academic(self): + def test_vwp_argumentword_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_argumentword',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_argumentword',summaryType='percent'), 100) - def test_is_academic(self): + def test_vwp_explicit_argument_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_explicit_argument',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_explicit_argument',summaryType='percent'), 15) - def test_is_academic(self): + def test_vwp_statements_of_opinion_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_opinion',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_opinion',summaryType='percent'), 78) - def test_is_academic(self): + def test_vwp_statements_of_fact_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_fact',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_fact',summaryType='percent'), 22) - def test_is_academic(self): + def test_transitions_counts(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',summaryType='counts')) + counts_dict = json.loads(doc._.AWE_Info(infoType='Doc',indicator='transitions',summaryType='counts')) + self.assertEqual(sum(list(counts_dict.values())), 25) - def test_is_academic(self): + def test_transitions_positive_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['positive'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['positive'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_conditional_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['conditional'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['conditional'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_consequential_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['consequential'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['consequential'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_contrastive_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['contrastive'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['contrastive'])],summaryType='total'), 5) - def test_is_academic(self): + def test_transitions_counterpoint_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['counterpoint'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['counterpoint'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_comparative_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['comparative'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['comparative'])],summaryType='total'), 1) - def test_is_academic(self): + def test_transitions_crossreferential_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['crossreferential'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['crossreferential'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_illustrative_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['illustrative'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['illustrative'])],summaryType='total'), 6) - def test_is_academic(self): + def test_transitions_negative_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['negative'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['negative'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_emphatic_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['emphatic'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['emphatic'])],summaryType='total'), 2) - def test_is_academic(self): + def test_transitions_evidentiary_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['evidentiary'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['evidentiary'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_general_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['general'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['general'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_ordinal_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['ordinal'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['ordinal'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_purposive_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['purposive'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['purposive'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_periphrastic_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['periphrastic'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['periphrastic'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_hypothetical_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['hypothetical'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['hypothetical'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_summative_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['summative'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['summative'])],summaryType='total'), 0) - def test_is_academic(self): + def test_transitions_introductory_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['introductory'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['introductory'])],summaryType='total'), 5) - def test_is_academic(self): + def test_pos_adj_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADJ'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADJ'])],summaryType='total'), 62) - def test_is_academic(self): + def test_pos_adv_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADV'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADV'])],summaryType='total'), 23) - def test_is_academic(self): + def test_pos_noun_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['NOUN'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['NOUN'])],summaryType='total'), 189) - def test_is_academic(self): + def test_pos_propn_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['PROPN'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['PROPN'])],summaryType='total'), 13) - def test_is_academic(self): + def test_pos_verb_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['VERB'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['VERB'])],summaryType='total'), 78) - def test_is_academic(self): + def test_pos_num_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['NUM'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['NUM'])],summaryType='total'), 2) - def test_is_academic(self): + def test_pos_adp_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADP'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADP'])],summaryType='total'), 81) - def test_is_academic(self): + def test_pos_cconj_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['CCONJ'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['CCONJ'])],summaryType='total'), 14) - def test_is_academic(self): + def test_pos_sconj_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['SCONJ'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['SCONJ'])],summaryType='total'), 17) - def test_is_academic(self): + def test_pos_aux_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['AUX'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['AUX'])],summaryType='total'), 36) - def test_is_academic(self): + def test_pos_pron_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='pos_',filters=[('==', ['PRON'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['PRON'])],summaryType='total'), 22) - def test_is_academic(self): + def test_sentence_types_counts(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',summaryType='counts')) + types_dict = json.loads(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',summaryType='counts')) + self.assertEqual(sum(list(types_dict.values())), 35) - def test_is_academic(self): + def test_sentence_types_simple_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Simple'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Simple'])],summaryType='total'), 13) - def test_is_academic(self): + def test_sentence_types_simple_complex_pred_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleComplexPred'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleComplexPred'])],summaryType='total'), 3) - def test_is_academic(self): + def test_sentence_types_simple_compound_pred_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundPred'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundPred'])],summaryType='total'), 0) - def test_is_academic(self): + def test_sentence_types_simple_compound_complex_pred_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundComplexPred'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundComplexPred'])],summaryType='total'), 0) - def test_is_academic(self): + def test_sentence_types_compound_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Compound'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Compound'])],summaryType='total'), 2) - def test_is_academic(self): + def test_sentence_types_complex_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Complex'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Complex'])],summaryType='total'), 16) - def test_is_academic(self): + def test_sentence_types_compound_complex_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['CompoundComplex'])],summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['CompoundComplex'])],summaryType='total'), 1) - def test_is_academic(self): + def test_vwp_source_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_source',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_source',summaryType='percent'), 0) - def test_is_academic(self): + def test_vwp_attribution_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_attribution',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_attribution',summaryType='percent'), 0) - def test_is_academic(self): + def test_vwp_cite_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_cite',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_cite',summaryType='percent'), 0) - def test_is_academic(self): + def test_vwp_quoted_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_quoted',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_quoted',summaryType='percent'), 0) - def test_is_academic(self): + def test_vwp_direct_speech_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='vwp_direct_speech',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_direct_speech',summaryType='percent'), 0) - def test_is_academic(self): + def test_vwp_in_direct_speech_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_in_direct_speech',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_in_direct_speech',summaryType='percent'), 0) - def test_is_academic(self): + def test_vwp_tone_greater_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_tone',filters=[('>', [0.4])],summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_tone',filters=[('>', [0.4])],summaryType='percent'), 1) - def test_is_academic(self): + def test_vwp_tone_lesser_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_tone',filters=[('<', [-0.4])],summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_tone',filters=[('<', [-0.4])],summaryType='percent'), 2) - def test_is_academic(self): + def test_concrete_details_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='concrete_details',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='concrete_details',summaryType='percent'), 2) - def test_is_academic(self): + def test_main_ideas_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='main_ideas',summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='main_ideas',summaryType='total'), 9) - def test_is_academic(self): + def test_supporting_ideas_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='supporting_ideas',summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='supporting_ideas',summaryType='total'), 11) - def test_is_academic(self): + def test_supporting_details_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='supporting_details',summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='supporting_details',summaryType='total'), 6) - def test_is_academic(self): + def test_nSyll_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='nSyll',filters=[('>', [3])],summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='nSyll',filters=[('>', [3])],summaryType='percent'), 10) - def test_is_academic(self): + def test_max_freq_lesser_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='max_freq',filters=[('<', [4])],summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='max_freq',filters=[('<', [4])],summaryType='percent'), 9) - def test_is_academic(self): + def test_sents_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='sents',summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sents',summaryType='total'), 35) - def test_is_academic(self): + def test_delimiter_n_total(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='delimiter_n',summaryType='total')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='delimiter_n',summaryType='total'), 223) - def test_is_academic(self): + def test_vwp_character_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='vwp_character',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='vwp_character',summaryType='percent'), 2) - def test_is_academic(self): + def test_in_past_tense_scope_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='percent'), 33) - def test_is_academic(self): + def test_vwp_propositional_attitudes_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='vwp_propositional_attitudes',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_propositional_attitudes',summaryType='percent'), 53) - def test_is_academic(self): + def test_vwp_social_awareness_percent(self): doc = holmes_manager.get_document('GRE_Sample_Essay') - print(doc._.AWE_Info(infoType='Doc',indicator='vwp_social_awareness',summaryType='percent')) + self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_social_awareness',summaryType='percent'), 3) -# --- [ END ] --------------------------------------------------------------------------- \ No newline at end of file +# --- [ END ] --------------------------------------------------------------------------- From b2d74fb4f0422e3b63d936e23e6e3b5e4f862539 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Tue, 16 Jul 2024 10:50:12 -0400 Subject: [PATCH 12/39] Updated gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5c31c44..3aa1b60 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__/ *.egg-info/ *.egg +build/ From 07f8f73db2f340e38a05550fb11c09a3960c0a99 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Tue, 16 Jul 2024 12:04:10 -0400 Subject: [PATCH 13/39] Added install scripts for conda and venv envs --- installation/conda_fresh_install.sh | 120 +++++++++++++++++++++++++++ installation/fresh_install.sh | 121 ++++++++++++++++++++++++++++ installation/install_template.sh | 18 +++++ 3 files changed, 259 insertions(+) create mode 100644 installation/conda_fresh_install.sh create mode 100644 installation/fresh_install.sh create mode 100644 installation/install_template.sh diff --git a/installation/conda_fresh_install.sh b/installation/conda_fresh_install.sh new file mode 100644 index 0000000..faa97a9 --- /dev/null +++ b/installation/conda_fresh_install.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash + +# AWE Workbench Install Script (for conda) +# Author: Caleb Scott + +# This script installs all necessary dependencies for AWE +# We make the following assumptions: +# * You have a working conda environment running on python 3.12. +# * You have pip installed in the conda environment. +# (you can do this by 'conda create --name XXXX python=3.12 pip') + +# Sanity Check: let the user know the preconditions. + +NEW_DIR=${0:-"arglab-dev-py312"} +NEW_CONDA_ENV=${1:-"noconda"} +NEW_GIT=${2:-"nogit"} +BRANCH=${3:-"main"} +PROTOBUF=${4:-"noproto"} +DATA=${5:-"data"} +NEW_JAVA=${6:-"nojava"} + +echo "============================ WARNING ==============================" +echo "\nYou are about to install AWE on this system." +echo "\n* Ensure that you are using a python3.12 version." +echo "\n* The following repos are needed: " +echo "\n > coreferee" +echo "\n > holmes-extractor-expandable" +echo "\n > AWE_LanguageTool" +echo "\n > AWE_SpellCorrect" +echo "\n > AWE_Lexica" +echo "\n > AWE_Components" +echo "\n > AWE_Workbench" +echo "\n" +echo "\nUsage: " +echo "\n ./conda_fresh_install.sh [DIR] [CONDA] [GIT] [PROTOBUF] [DATA] [JAVA]" +echo "\n NEW_DIR: [XXXX/nodir]" +echo "\n CONDA: [conda/noconda]" +echo "\n GIT: [git/nogit]" +echo "\n BRANCH: [XXXX/main]" +echo "\n PROTOBUF: [proto/noproto]" +echo "\n DATA: [data/nodata]" +echo "\n JAVA: [java/nojava]" +echo "\n===================================================================" + +read -p "\n\n Continue [Y/N]? " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]] +then + # Set up new dir and cd to it + if [ $NEW_DIR -ne "nodir" ] + then + echo "Setting up new dev directory..." + mkdir $NEW_DIR + cd $NEW_DIR + fi + + # Set up new conda env + if [[ $NEW_CONDA_ENV =~ ^conda$ ]] + then + echo "Setting up new conda env..." + conda create --name arglab-dev-py312 python=3.12 pip + fi + + # Activate the env + echo "Activating conda env..." + conda activate arglab-dev-py312 + + # Download relevant github repos + if [[ $NEW_GIT =~ ^git$ ]] + then + echo "Pulling github repos..." + git clone -b $BRANCH git@github.com:ArgLab/coreferee.git + git clone -b $BRANCH git@github.com:ArgLab/holmes-extractor-expandable.git + git clone -b $BRANCH git@github.com:ArgLab/AWE_LanguageTool.git + git clone -b $BRANCH git@github.com:ArgLab/AWE_Lexica.git + git clone -b $BRANCH git@github.com:ArgLab/AWE_Components.git + git clone -b $BRANCH git@github.com:ArgLab/AWE_Workbench.git + fi + + # Install repos + echo "Installing repos..." + pip install -e ./coreferee + pip install -e ./holmes-extractor-expandable + pip install -e ./AWE_LanguageTool + pip install -e ./AWE_SpellCorrect + pip install -e ./AWE_Components + pip install -e ./AWE_Workbench + + # PROTOBUF fix? + if [[ $PROTOBUF =~ ^proto$ ]] + then + pip install protobuf==3.20.0 + fi + + # Download data + if [[ $DATA =~ ^data$ ]] + then + echo "Downloading data..." + python -m awe_workbench.setup.data --develop + fi + + # Install java + if [[ $JAVA =~ ^java$ ]] + then + echo "Installing java sdk..." + # Source: https://askubuntu.com/questions/1279677/how-to-install-openjdk-14-jdk-on-ubuntu-16-04 + wget https://download.java.net/java/GA/jdk14.0.2/205943a0976c4ed48cb16f1043c5c647/12/GPL/openjdk-14.0.2_linux-x64_bin.tar.gz + + tar xvf openjdk-14.0.2_linux-x64_bin.tar.gz + + mv jdk-14.0.2 /usr/lib/jvm + + update-alternatives --install "/usr/bin/javac" "javac" "/usr/lib/jvm/jdk-14.0.2/bin/javac" 3 + update-alternatives --install "/usr/bin/java" "java" "/usr/lib/jvm/jdk-14.0.2/bin/java" 3 + update-alternatives --set "javac" "/usr/lib/jvm/jdk-14.0.2/bin/javac" + update-alternatives --set "java" "/usr/lib/jvm/jdk-14.0.2/bin/java" + + update-alternatives --config java + fi +fi diff --git a/installation/fresh_install.sh b/installation/fresh_install.sh new file mode 100644 index 0000000..83d2715 --- /dev/null +++ b/installation/fresh_install.sh @@ -0,0 +1,121 @@ +#!/usr/bin/env bash + +# AWE Workbench Install Script (for python venv) +# Author: Caleb Scott + +# This script installs all necessary dependencies for AWE +# We make the following assumptions: +# * You are using python3.12 + +# Sanity Check: let the user know the preconditions. + +NEW_DIR=${0:-"arglab-dev-py312"} +NEW_CONDA_ENV=${1:-"novenv"} +NEW_GIT=${2:-"nogit"} +BRANCH=${3:-"main"} +PROTOBUF=${4:-"noproto"} +DATA=${5:-"data"} +NEW_JAVA=${6:-"nojava"} + +echo "============================ WARNING ==============================" +echo "\nYou are about to install AWE on this system." +echo "\n* Ensure that you are using a python3.12 version." +echo "\n* The following repos are needed: " +echo "\n > coreferee" +echo "\n > holmes-extractor-expandable" +echo "\n > AWE_LanguageTool" +echo "\n > AWE_SpellCorrect" +echo "\n > AWE_Lexica" +echo "\n > AWE_Components" +echo "\n > AWE_Workbench" +echo "\n" +echo "\nUsage: " +echo "\n ./conda_fresh_install.sh [DIR] [VENV] [GIT] [PROTOBUF] [DATA] [JAVA]" +echo "\n NEW_DIR: [XXXX/nodir]" +echo "\n VENV: [venv/novenv]" +echo "\n GIT: [git/nogit]" +echo "\n BRANCH: [XXXX/main]" +echo "\n PROTOBUF: [proto/noproto]" +echo "\n DATA: [data/nodata]" +echo "\n JAVA: [java/nojava]" +echo "\n===================================================================" + +read -p "\n\n Continue [Y/N]? " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]] +then + # Set up new dir and cd to it + if [ $NEW_DIR -ne "nodir" ] + then + echo "Setting up new dev directory..." + mkdir $NEW_DIR + cd $NEW_DIR + fi + + # Set up new conda env + if [[ $NEW_CONDA_ENV =~ ^venv$ ]] + then + echo "Setting up new venv env..." + python -m venv $NEW_DIR-env + fi + + # Activate the env + echo "Activating venv env..." + $NEW_DIR-env/bin/activate + + # Download relevant github repos + if [[ $NEW_GIT =~ ^git$ ]] + then + echo "Pulling github repos..." + git clone -b $BRANCH git@github.com:ArgLab/coreferee.git + git clone -b $BRANCH git@github.com:ArgLab/holmes-extractor-expandable.git + git clone -b $BRANCH git@github.com:ArgLab/AWE_LanguageTool.git + git clone -b $BRANCH git@github.com:ArgLab/AWE_Lexica.git + git clone -b $BRANCH git@github.com:ArgLab/AWE_Components.git + git clone -b $BRANCH git@github.com:ArgLab/AWE_Workbench.git + fi + + # Update pip + pip install pip --upgrade + + # Install repos + echo "Installing repos..." + pip install -e ./coreferee + pip install -e ./holmes-extractor-expandable + pip install -e ./AWE_LanguageTool + pip install -e ./AWE_SpellCorrect + pip install -e ./AWE_Components + pip install -e ./AWE_Workbench + + # PROTOBUF fix? + if [[ $PROTOBUF =~ ^proto$ ]] + then + pip install protobuf==3.20.0 + fi + + # Download data + if [[ $DATA =~ ^data$ ]] + then + echo "Downloading data..." + python -m awe_workbench.setup.data --develop + fi + + # Install java + if [[ $JAVA =~ ^java$ ]] + then + echo "Installing java sdk..." + # Source: https://askubuntu.com/questions/1279677/how-to-install-openjdk-14-jdk-on-ubuntu-16-04 + wget https://download.java.net/java/GA/jdk14.0.2/205943a0976c4ed48cb16f1043c5c647/12/GPL/openjdk-14.0.2_linux-x64_bin.tar.gz + + tar xvf openjdk-14.0.2_linux-x64_bin.tar.gz + + mv jdk-14.0.2 /usr/lib/jvm + + update-alternatives --install "/usr/bin/javac" "javac" "/usr/lib/jvm/jdk-14.0.2/bin/javac" 3 + update-alternatives --install "/usr/bin/java" "java" "/usr/lib/jvm/jdk-14.0.2/bin/java" 3 + update-alternatives --set "javac" "/usr/lib/jvm/jdk-14.0.2/bin/javac" + update-alternatives --set "java" "/usr/lib/jvm/jdk-14.0.2/bin/java" + + update-alternatives --config java + fi +fi diff --git a/installation/install_template.sh b/installation/install_template.sh new file mode 100644 index 0000000..d510b74 --- /dev/null +++ b/installation/install_template.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# AWE Workbench Install Script Template +# Author: Caleb Scott + +# This script shows how to use the command-line arguments for the fresh_install.sh scripts. + +# CASE 1: no dir, no env, no git, main branch, no proto, no data, no java (conda env) +./conda_fresh_install.sh arglab-dev-py312 conda git main proto data java + +# CASE 1 (venv) +# ./fresh_install.sh arglab-dev-py312 venv git main proto data java + +# CASE 2: dir + env + git, main branch, no proto, no data, java (conda env) +./conda_fresh_install.sh nodir noconda nogit main proto data nojava + +# CASE 2 (venv) +# ./fresh_install.sh nodir novenv nogit main proto data nojava From c782c2d02d8e844ac96cb986ff318d3dd0cd8e6e Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Tue, 20 Aug 2024 14:22:05 -0400 Subject: [PATCH 14/39] Cleaned install scripts --- installation/AddAWEToVENV.sh | 97 ------------------- installation/conda_install.sh | 54 ----------- ...fresh_install.sh => venv_fresh_install.sh} | 0 3 files changed, 151 deletions(-) delete mode 100755 installation/AddAWEToVENV.sh delete mode 100644 installation/conda_install.sh rename installation/{fresh_install.sh => venv_fresh_install.sh} (100%) diff --git a/installation/AddAWEToVENV.sh b/installation/AddAWEToVENV.sh deleted file mode 100755 index 4b38033..0000000 --- a/installation/AddAWEToVENV.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env bash -# -# Add AWEToVENV -# Collin F. Lynch - -# This script takes as argument a specified VENV. It -# then adds the workbench modules to the VENV through -# the basic install process. This assumes that it is -# run in the specified install directory, that all of -# the necessary modules are present, and that the venv -# has already been constructed using the script in the -# servermanagement directory. - - -# Argument -# -------------------------------------------- -# This script takes one argument which should be the -# path to the same VENV that we are using for writing -# observer. - -VIRTUAL_ENV="$1" -echo "USING VENV: $VIRTUAL_ENV" - - -# Parameters: -# --------------------------------------------- -PYTHON_CMD="python" -PIP_CMD="pip" - -CODE_REPOS_LOC="../../" - - - - -# Activate VENV -# --------------------------------------------------------- -source "$VIRTUAL_ENV/bin/activate" - - -# GPU Installation -# ---------------------------------------------------------- -# If we plan to use a GPU then this line must also -# be run. Comment out the code below if you do -# not want cuda installed or edit it for your -# library version. -# -# Note that by default we seem to be unable to rely -# on spacy to pull the right cuda on its own -echo -e "\n=== Installing Spacy CUDA, comment out if not needed. ===" -echo -e "\n Using CUDA v. 117" -"$PIP_CMD" install spacy[cuda117] - -# If you are using cuda 12.1 as we are on some -# systems then spacy's passthrough install will -# not work. Therefore you will need a two-step -# process. -#echo -e "\n Using CUDA v. 12.x" -#"$PIP_CMD" install cupy-cuda12x -#"$PIP_CMD" install spacy[cuda12x] - - -# Package Installation -# ---------------------------------------------------------- -echo -e "\n=== Installing Holmes Extractor ===" -"$PIP_CMD" install -e "$CODE_REPOS_LOC/holmes-extractor-expandable/" - -echo -e "\n\n=== Installing Language Tool ===" -"$PIP_CMD" install -e "$CODE_REPOS_LOC/AWE_LanguageTool" - -echo -e "\n\n=== Installing Spell Correction ===" -"$PIP_CMD" install -e "$CODE_REPOS_LOC/AWE_SpellCorrect" - -echo -e "\n\n=== Installing Lexica ===" -"$PIP_CMD" install -e "$CODE_REPOS_LOC/AWE_Lexica" - -echo -e "\n\n === Installing AWE Components ===" -"$PIP_CMD" install -e "$CODE_REPOS_LOC/AWE_Components" - -echo -e "\n\n=== Installing Workbench ===" -"$PIP_CMD" install -e "$CODE_REPOS_LOC/AWE_Workbench" - - -# Necessary Datafiles. -# ----------------------------------------------- - -echo -e "\n\n=== Installing shared data. ===" - -# Should be unneeded. -# Install spacy en_core_web_sm -#python -m spacy download en_core_web_sm -# NLK Data. -#python -m nltk.downloader all - -# And set up the data. -#python -m awe_workbench.setup.data --install (--develop if installing in development mode) -python -m awe_workbench.setup.data --develop - diff --git a/installation/conda_install.sh b/installation/conda_install.sh deleted file mode 100644 index eb3befa..0000000 --- a/installation/conda_install.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env bash - -# AWE Workbench Install Script (for conda) -# Author: Caleb Scott - -# This script installs all necessary dependencies for AWE for python 3.11. -# We make the following assumptions: -# * You have a working conda environment running on python 3.11. -# * You are cd'd into the AWE_Workbench project directory. -# * You have pip installed in the conda environment. - -# Sanity Check: let the user know the preconditions. - -echo "========================= WARNING ===========================" -echo "\nYou are about to install AWE on this system." -echo "\nYou must have the following conditions met:" -echo "\n* You are currently using a python3.11 conda environment" -echo "\n* You are currently in the AWE_Workbench/installation dir" -echo "\n* All other repos have been downloaded:" -echo "\n > holmes-extractor-expandable" -echo "\n > AWE_LanguageTool" -echo "\n > AWE_SpellCorrect" -echo "\n > AWE_Lexica" -echo "\n > AWE_Components" -echo "\n=============================================================" - -read -p "\n\n Continue [Y/N]? " -n 1 -r -echo # (optional) move to a new line -if [[ $REPLY =~ ^[Yy]$ ]] -then - # Install repositories - - CODE_REPOS_LOC="../../" - echo "\n\n Installing from Source..." - - pip install -e "$CODE_REPOS_LOC/holmes-extractor-expandable/" - pip install -e "$CODE_REPOS_LOC/AWE_LanguageTool/" - pip install -e "$CODE_REPOS_LOC/AWE_SpellCorrect/" - pip install -e "$CODE_REPOS_LOC/AWE_Lexica/" - pip install -e "$CODE_REPOS_LOC/AWE_Components/" - pip install -e "$CODE_REPOS_LOC/AWE_Workbench/" - - # Patch: protobuf==3.20.0 - # Data does not properly install without this patch. - pip install protobuf==3.20.0 - - # Install data - echo "\n\n Installing data..." - - python -m awe_workbench.setup.data --develop -fi - -# Optional: prompt user input to install the proper java version? -# Source; https://askubuntu.com/questions/1279677/how-to-install-openjdk-14-jdk-on-ubuntu-16-04 diff --git a/installation/fresh_install.sh b/installation/venv_fresh_install.sh similarity index 100% rename from installation/fresh_install.sh rename to installation/venv_fresh_install.sh From 6e9f1be793ba2816c79240e8aef282da11263e49 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Tue, 20 Aug 2024 14:50:49 -0400 Subject: [PATCH 15/39] Moved old tests, updated web files --- awe_workbench/web/parserServer.py | 32 +++++++-- awe_workbench/web/startServers.py | 68 ++++++++++--------- awe_workbench/web/websocketClient.py | 23 +++++-- .../{ => old_tests}/test_lexical_clusters.py | 0 .../{ => old_tests}/test_lexical_features.py | 0 .../test_prompt_specific_features.py | 0 tests/{ => old_tests}/test_server_api.py | 0 .../test_syntax_discourse_features.py | 0 .../test_viewpoint_perspective_features.py | 0 .../test_viewpoint_perspective_features2.py | 0 .../test_viewpoint_perspective_features3.py | 0 .../test_viewpoint_perspective_features4.py | 0 .../test_viewpoint_perspective_features5.py | 0 .../test_viewpoint_perspective_features6.py | 0 .../test_viewpoint_perspective_features7.py | 0 15 files changed, 80 insertions(+), 43 deletions(-) rename tests/{ => old_tests}/test_lexical_clusters.py (100%) rename tests/{ => old_tests}/test_lexical_features.py (100%) rename tests/{ => old_tests}/test_prompt_specific_features.py (100%) rename tests/{ => old_tests}/test_server_api.py (100%) rename tests/{ => old_tests}/test_syntax_discourse_features.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features2.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features3.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features4.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features5.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features6.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features7.py (100%) diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index eca4071..ccac150 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -1,18 +1,36 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service +""" +--- [ Test: parserServer.py ] ----------------------------------------------------------- +Main server for parsing commands regarding spacy pipeline. + +@8/20/2024: modifications made to function without the use of holmes-extractor; this is +to make AWE_Workbench far easier to manage regarding dependency issues. + +Author: Caleb Scott (cwscott3@ncsu.edu) + +Copyright 2022, Educational Testing Service + +----------------------------------------------------------------------------------------- +""" + +# --- [ IMPORTS ] ----------------------------------------------------------------------- + +# Basic lib imports import asyncio import base64 import websockets import json -import awe_workbench + +# AWE imports import holmes_extractor import holmes_extractor.manager import holmes_extractor.ontology -from holmes_extractor.manager import Manager -from holmes_extractor.ontology import Ontology from awe_components.components.utility_functions import content_pos +# --- [ CONSTS/VARS ] ------------------------------------------------------------------- + +# --- [ CLASSES ] ----------------------------------------------------------------------- + class parserServer: # Initialize @@ -1322,6 +1340,10 @@ async def run_parser(self, websocket, path): else: await websocket.send(False) +# --- [ MAIN ] -------------------------------------------------------------------------- + if __name__ == '__main__': print('parser server loading') wsc = parserServer() + +# --- [ END ] --------------------------------------------------------------------------- diff --git a/awe_workbench/web/startServers.py b/awe_workbench/web/startServers.py index a512d62..67a0af1 100644 --- a/awe_workbench/web/startServers.py +++ b/awe_workbench/web/startServers.py @@ -1,10 +1,18 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service +""" +--- [ Test: startServers.py ] ----------------------------------------------------------- -from multiprocessing import Process, Queue +Code for kicking off the parserServer. -import os -import time +Author: Caleb Scott (cwscott3@ncsu.edu) + +Copyright 2022, Educational Testing Service + +----------------------------------------------------------------------------------------- +""" + +# --- [ IMPORTS ] ----------------------------------------------------------------------- + +from multiprocessing import Process import awe_languagetool.languagetoolServer import awe_spellcorrect.spellcorrectServer @@ -12,39 +20,35 @@ import argparse from awe_workbench.pipeline import pipeline_def +# --- [ CLASSES ] ----------------------------------------------------------------------- -class startServers: - - # Initialize - p1 = None - p2 = None - p3 = None - queue = None +def startServers(): - def __init__(self): - queue = Queue() + p1 = Process( + target=awe_languagetool.languagetoolServer.runServer, + args=() + ) + p1.start() - p1 = \ - Process(target=awe_languagetool.languagetoolServer.runServer, - args=()) - p1.start() + p2 = Process( + target=awe_spellcorrect.spellcorrectServer.spellcorrectServer, + args=() + ) + p2.start() - p2 = \ - Process(target=awe_spellcorrect.spellcorrectServer.spellcorrectServer, - args=()) - p2.start() - - p3 = Process(target=awe_workbench.web.parserServer.parserServer, - args=(), - kwargs={'pipeline_def': pipeline_def}) - p3.start() + p3 = Process( + target=awe_workbench.web.parserServer.parserServer,args=(), + kwargs={ + 'pipeline_def': pipeline_def + } + ) + p3.start() +# --- [ MAIN ] -------------------------------------------------------------------------- if __name__ == '__main__': - - parser = \ - argparse.ArgumentParser(description='Run AWE Workbench server scripts') - + parser = argparse.ArgumentParser(description='Run AWE Workbench server scripts') args = parser.parse_args() - startServers() + +# --- [ END ] --------------------------------------------------------------------------- diff --git a/awe_workbench/web/websocketClient.py b/awe_workbench/web/websocketClient.py index 47212e2..885c254 100644 --- a/awe_workbench/web/websocketClient.py +++ b/awe_workbench/web/websocketClient.py @@ -1,16 +1,24 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service +""" +--- [ Test: startServers.py ] ----------------------------------------------------------- + +Code for kicking off the parserServer. + +Author: Caleb Scott (cwscott3@ncsu.edu) + +Copyright 2022, Educational Testing Service + +----------------------------------------------------------------------------------------- +""" + +# --- [ IMPORTS ] ----------------------------------------------------------------------- -import asyncio -import websocket import json from websocket import create_connection +# --- [ CLASSES ] ----------------------------------------------------------------------- class websocketClient: - uri = None - def __init__(self): self.uri = "ws://localhost:8765" @@ -44,8 +52,11 @@ def sendraw(self, texts: list): print(e) return None +# --- [ MAIN ] -------------------------------------------------------------------------- if __name__ == '__main__': wsc = websocketClient() test = wsc.check(['The grrls are happpy.']) print(test) + +# --- [ END ] --------------------------------------------------------------------------- diff --git a/tests/test_lexical_clusters.py b/tests/old_tests/test_lexical_clusters.py similarity index 100% rename from tests/test_lexical_clusters.py rename to tests/old_tests/test_lexical_clusters.py diff --git a/tests/test_lexical_features.py b/tests/old_tests/test_lexical_features.py similarity index 100% rename from tests/test_lexical_features.py rename to tests/old_tests/test_lexical_features.py diff --git a/tests/test_prompt_specific_features.py b/tests/old_tests/test_prompt_specific_features.py similarity index 100% rename from tests/test_prompt_specific_features.py rename to tests/old_tests/test_prompt_specific_features.py diff --git a/tests/test_server_api.py b/tests/old_tests/test_server_api.py similarity index 100% rename from tests/test_server_api.py rename to tests/old_tests/test_server_api.py diff --git a/tests/test_syntax_discourse_features.py b/tests/old_tests/test_syntax_discourse_features.py similarity index 100% rename from tests/test_syntax_discourse_features.py rename to tests/old_tests/test_syntax_discourse_features.py diff --git a/tests/test_viewpoint_perspective_features.py b/tests/old_tests/test_viewpoint_perspective_features.py similarity index 100% rename from tests/test_viewpoint_perspective_features.py rename to tests/old_tests/test_viewpoint_perspective_features.py diff --git a/tests/test_viewpoint_perspective_features2.py b/tests/old_tests/test_viewpoint_perspective_features2.py similarity index 100% rename from tests/test_viewpoint_perspective_features2.py rename to tests/old_tests/test_viewpoint_perspective_features2.py diff --git a/tests/test_viewpoint_perspective_features3.py b/tests/old_tests/test_viewpoint_perspective_features3.py similarity index 100% rename from tests/test_viewpoint_perspective_features3.py rename to tests/old_tests/test_viewpoint_perspective_features3.py diff --git a/tests/test_viewpoint_perspective_features4.py b/tests/old_tests/test_viewpoint_perspective_features4.py similarity index 100% rename from tests/test_viewpoint_perspective_features4.py rename to tests/old_tests/test_viewpoint_perspective_features4.py diff --git a/tests/test_viewpoint_perspective_features5.py b/tests/old_tests/test_viewpoint_perspective_features5.py similarity index 100% rename from tests/test_viewpoint_perspective_features5.py rename to tests/old_tests/test_viewpoint_perspective_features5.py diff --git a/tests/test_viewpoint_perspective_features6.py b/tests/old_tests/test_viewpoint_perspective_features6.py similarity index 100% rename from tests/test_viewpoint_perspective_features6.py rename to tests/old_tests/test_viewpoint_perspective_features6.py diff --git a/tests/test_viewpoint_perspective_features7.py b/tests/old_tests/test_viewpoint_perspective_features7.py similarity index 100% rename from tests/test_viewpoint_perspective_features7.py rename to tests/old_tests/test_viewpoint_perspective_features7.py From 6cdf48b60768b8dc3ad02c1fa8c98cdfeb79050d Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 22 Aug 2024 14:59:33 -0400 Subject: [PATCH 16/39] Updated main test to work without holmes --- awe_workbench/web/parserServer.py | 3 - tests/test_awe_nlp.py | 240 +++++++++++------------------- 2 files changed, 91 insertions(+), 152 deletions(-) diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index ccac150..cd18269 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -33,9 +33,6 @@ class parserServer: - # Initialize - parser = None - def __init__(self, pipeline_def=[]): # set up and initializing Holmes diff --git a/tests/test_awe_nlp.py b/tests/test_awe_nlp.py index 77d9159..6120f36 100644 --- a/tests/test_awe_nlp.py +++ b/tests/test_awe_nlp.py @@ -10,302 +10,244 @@ # --- [ IMPORTS ] ----------------------------------------------------------------------- -import holmes_extractor.manager as holmes import unittest import json -from awe_components.components.utility_functions import print_parse_tree +import spacy + from awe_workbench.pipeline import pipeline_def # --- [ CONSTS/VARS ] ------------------------------------------------------------------- -holmes_manager = holmes.Manager( - 'en_core_web_lg', - perform_coreference_resolution=False, - number_of_workers=2, - extra_components=pipeline_def -) +SPACY_MODEL = 'en_core_web_lg' -# --- [ SETUP ] ------------------------------------------------------------------------- +COMPONENTS = [el['component'] for el in pipeline_def] -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -# NOTE: in the examples directory, this sample is called gre6 -holmes_manager.parse_and_register_document( - document_text="The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", - label='GRE_Sample_Essay' -) +TEST_TEXT_LOC = "awe_workbench/examples/essays/gre6.txt" # --- [ CLASSES ] ----------------------------------------------------------------------- class AWENLPTest(unittest.TestCase): + def setUp(self): + """ + This is the basic initializer for all test classes. + + Sets up the spacy pipeline. + """ + # Initialize the pipeline + try: + self.nlp = spacy.load(SPACY_MODEL) + for comp in COMPONENTS: + self.nlp.add_pipe(comp) + except OSError as e: + print("There was an error loading 'en_core_web_lg' from spacy.") + raise OSError() from e + + # Now get the text + with open(TEST_TEXT_LOC, 'r') as in_file: + self.doc = self.nlp(in_file.read()) + def test_is_academic(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='is_academic',summaryType='percent'), 22) + self.assertEqual(self.doc._.AWE_Info(indicator='is_academic',summaryType='percent'), 22) def test_vwp_interactive_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_interactive',summaryType='percent'), 4) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_interactive',summaryType='percent'), 4) def test_is_latinate(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='is_latinate',summaryType='percent'), 13) + self.assertEqual(self.doc._.AWE_Info(indicator='is_latinate',summaryType='percent'), 13) def test_vwp_evaluation_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_evaluation',summaryType='total'), 704) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_evaluation',summaryType='total'), 704) def test_vwp_emotionword_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_emotionword',summaryType='percent'), 2) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_emotionword',summaryType='percent'), 2) def test_vwp_argumentword_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_argumentword',summaryType='percent'), 100) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_argumentword',summaryType='percent'), 100) def test_vwp_explicit_argument_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_explicit_argument',summaryType='percent'), 15) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_explicit_argument',summaryType='percent'), 15) def test_vwp_statements_of_opinion_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_opinion',summaryType='percent'), 78) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_opinion',summaryType='percent'), 78) def test_vwp_statements_of_fact_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_fact',summaryType='percent'), 22) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_fact',summaryType='percent'), 22) def test_transitions_counts(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - counts_dict = json.loads(doc._.AWE_Info(infoType='Doc',indicator='transitions',summaryType='counts')) + counts_dict = json.loads(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',summaryType='counts')) self.assertEqual(sum(list(counts_dict.values())), 25) def test_transitions_positive_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['positive'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['positive'])],summaryType='total'), 0) def test_transitions_conditional_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['conditional'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['conditional'])],summaryType='total'), 0) def test_transitions_consequential_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['consequential'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['consequential'])],summaryType='total'), 0) def test_transitions_contrastive_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['contrastive'])],summaryType='total'), 5) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['contrastive'])],summaryType='total'), 5) def test_transitions_counterpoint_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['counterpoint'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['counterpoint'])],summaryType='total'), 0) def test_transitions_comparative_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['comparative'])],summaryType='total'), 1) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['comparative'])],summaryType='total'), 1) def test_transitions_crossreferential_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['crossreferential'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['crossreferential'])],summaryType='total'), 0) def test_transitions_illustrative_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['illustrative'])],summaryType='total'), 6) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['illustrative'])],summaryType='total'), 6) def test_transitions_negative_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['negative'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['negative'])],summaryType='total'), 0) def test_transitions_emphatic_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['emphatic'])],summaryType='total'), 2) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['emphatic'])],summaryType='total'), 2) def test_transitions_evidentiary_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['evidentiary'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['evidentiary'])],summaryType='total'), 0) def test_transitions_general_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['general'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['general'])],summaryType='total'), 0) def test_transitions_ordinal_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['ordinal'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['ordinal'])],summaryType='total'), 0) def test_transitions_purposive_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['purposive'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['purposive'])],summaryType='total'), 0) def test_transitions_periphrastic_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['periphrastic'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['periphrastic'])],summaryType='total'), 0) def test_transitions_hypothetical_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['hypothetical'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['hypothetical'])],summaryType='total'), 0) def test_transitions_summative_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['summative'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['summative'])],summaryType='total'), 0) def test_transitions_introductory_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['introductory'])],summaryType='total'), 5) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['introductory'])],summaryType='total'), 5) def test_pos_adj_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADJ'])],summaryType='total'), 62) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADJ'])],summaryType='total'), 62) def test_pos_adv_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADV'])],summaryType='total'), 23) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADV'])],summaryType='total'), 23) def test_pos_noun_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['NOUN'])],summaryType='total'), 189) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['NOUN'])],summaryType='total'), 189) def test_pos_propn_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['PROPN'])],summaryType='total'), 13) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['PROPN'])],summaryType='total'), 13) def test_pos_verb_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['VERB'])],summaryType='total'), 78) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['VERB'])],summaryType='total'), 78) def test_pos_num_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['NUM'])],summaryType='total'), 2) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['NUM'])],summaryType='total'), 2) def test_pos_adp_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADP'])],summaryType='total'), 81) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADP'])],summaryType='total'), 81) def test_pos_cconj_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['CCONJ'])],summaryType='total'), 14) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['CCONJ'])],summaryType='total'), 14) def test_pos_sconj_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['SCONJ'])],summaryType='total'), 17) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['SCONJ'])],summaryType='total'), 17) def test_pos_aux_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['AUX'])],summaryType='total'), 36) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['AUX'])],summaryType='total'), 36) def test_pos_pron_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='pos_',filters=[('==', ['PRON'])],summaryType='total'), 22) + self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['PRON'])],summaryType='total'), 22) def test_sentence_types_counts(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - types_dict = json.loads(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',summaryType='counts')) + types_dict = json.loads(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',summaryType='counts')) self.assertEqual(sum(list(types_dict.values())), 35) def test_sentence_types_simple_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Simple'])],summaryType='total'), 13) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Simple'])],summaryType='total'), 13) def test_sentence_types_simple_complex_pred_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleComplexPred'])],summaryType='total'), 3) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleComplexPred'])],summaryType='total'), 3) def test_sentence_types_simple_compound_pred_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundPred'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundPred'])],summaryType='total'), 0) def test_sentence_types_simple_compound_complex_pred_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundComplexPred'])],summaryType='total'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundComplexPred'])],summaryType='total'), 0) def test_sentence_types_compound_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Compound'])],summaryType='total'), 2) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Compound'])],summaryType='total'), 2) def test_sentence_types_complex_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Complex'])],summaryType='total'), 16) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Complex'])],summaryType='total'), 16) def test_sentence_types_compound_complex_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['CompoundComplex'])],summaryType='total'), 1) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['CompoundComplex'])],summaryType='total'), 1) def test_vwp_source_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_source',summaryType='percent'), 0) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_source',summaryType='percent'), 0) def test_vwp_attribution_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_attribution',summaryType='percent'), 0) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_attribution',summaryType='percent'), 0) def test_vwp_cite_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_cite',summaryType='percent'), 0) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_cite',summaryType='percent'), 0) def test_vwp_quoted_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_quoted',summaryType='percent'), 0) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_quoted',summaryType='percent'), 0) def test_vwp_direct_speech_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_direct_speech',summaryType='percent'), 0) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_direct_speech',summaryType='percent'), 0) def test_vwp_in_direct_speech_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_in_direct_speech',summaryType='percent'), 0) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_in_direct_speech',summaryType='percent'), 0) def test_vwp_tone_greater_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_tone',filters=[('>', [0.4])],summaryType='percent'), 1) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_tone',filters=[('>', [0.4])],summaryType='percent'), 1) def test_vwp_tone_lesser_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_tone',filters=[('<', [-0.4])],summaryType='percent'), 2) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_tone',filters=[('<', [-0.4])],summaryType='percent'), 2) def test_concrete_details_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='concrete_details',summaryType='percent'), 2) + self.assertEqual(self.doc._.AWE_Info(indicator='concrete_details',summaryType='percent'), 2) def test_main_ideas_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='main_ideas',summaryType='total'), 9) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='main_ideas',summaryType='total'), 9) def test_supporting_ideas_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='supporting_ideas',summaryType='total'), 11) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='supporting_ideas',summaryType='total'), 11) def test_supporting_details_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='supporting_details',summaryType='total'), 6) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='supporting_details',summaryType='total'), 6) def test_nSyll_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='nSyll',filters=[('>', [3])],summaryType='percent'), 10) + self.assertEqual(self.doc._.AWE_Info(indicator='nSyll',filters=[('>', [3])],summaryType='percent'), 10) def test_max_freq_lesser_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='max_freq',filters=[('<', [4])],summaryType='percent'), 9) + self.assertEqual(self.doc._.AWE_Info(indicator='max_freq',filters=[('<', [4])],summaryType='percent'), 9) def test_sents_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='sents',summaryType='total'), 35) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sents',summaryType='total'), 35) def test_delimiter_n_total(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='delimiter_n',summaryType='total'), 223) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='delimiter_n',summaryType='total'), 223) def test_vwp_character_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='vwp_character',summaryType='percent'), 2) + self.assertEqual(self.doc._.AWE_Info(indicator='vwp_character',summaryType='percent'), 2) def test_in_past_tense_scope_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='percent'), 33) + self.assertEqual(self.doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='percent'), 33) def test_vwp_propositional_attitudes_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_propositional_attitudes',summaryType='percent'), 53) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_propositional_attitudes',summaryType='percent'), 53) def test_vwp_social_awareness_percent(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.AWE_Info(infoType='Doc',indicator='vwp_social_awareness',summaryType='percent'), 3) + self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_social_awareness',summaryType='percent'), 3) # --- [ END ] --------------------------------------------------------------------------- From c4165be94079db8a49868312e9db1170b3e07925 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 22 Aug 2024 15:43:16 -0400 Subject: [PATCH 17/39] Updated main test, added essay 'chooser' --- tests/test_awe_nlp.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/test_awe_nlp.py b/tests/test_awe_nlp.py index 6120f36..db5df77 100644 --- a/tests/test_awe_nlp.py +++ b/tests/test_awe_nlp.py @@ -13,8 +13,17 @@ import unittest import json import spacy +import coreferee +import spacytextblob.spacytextblob + +import awe_components.components.lexicalFeatures +import awe_components.components.syntaxDiscourseFeats +import awe_components.components.viewpointFeatures +import awe_components.components.lexicalClusters +import awe_components.components.contentSegmentation from awe_workbench.pipeline import pipeline_def +from examples.essays.essays import get_essay # --- [ CONSTS/VARS ] ------------------------------------------------------------------- @@ -22,7 +31,7 @@ COMPONENTS = [el['component'] for el in pipeline_def] -TEST_TEXT_LOC = "awe_workbench/examples/essays/gre6.txt" +TEST_TEXT = "gre6.txt" # --- [ CLASSES ] ----------------------------------------------------------------------- @@ -37,6 +46,7 @@ def setUp(self): # Initialize the pipeline try: self.nlp = spacy.load(SPACY_MODEL) + self.nlp.add_pipe('coreferee') for comp in COMPONENTS: self.nlp.add_pipe(comp) except OSError as e: @@ -44,8 +54,7 @@ def setUp(self): raise OSError() from e # Now get the text - with open(TEST_TEXT_LOC, 'r') as in_file: - self.doc = self.nlp(in_file.read()) + self.doc = self.nlp(get_essay(TEST_TEXT)) def test_is_academic(self): self.assertEqual(self.doc._.AWE_Info(indicator='is_academic',summaryType='percent'), 22) From e46b58eeed17f5f4d1126d1a9a785bf8a256b760 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 22 Aug 2024 15:43:36 -0400 Subject: [PATCH 18/39] Updated essay 'chooser' --- examples/__init__.py | 0 examples/essays/__init__.py | 0 examples/essays/essays.py | 21 +++++++++++++++++++++ 3 files changed, 21 insertions(+) create mode 100644 examples/__init__.py create mode 100644 examples/essays/__init__.py create mode 100644 examples/essays/essays.py diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/essays/__init__.py b/examples/essays/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/essays/essays.py b/examples/essays/essays.py new file mode 100644 index 0000000..6ce2498 --- /dev/null +++ b/examples/essays/essays.py @@ -0,0 +1,21 @@ +""" +--- [ Test: essays.py ] ----------------------------------------------------------------- + +Code for retrieving an essay from the set of all essays, using the filename. + +Author: Caleb Scott (cwscott3@ncsu.edu) + +----------------------------------------------------------------------------------------- +""" + +import os +import re + +def get_essay(essay_name: str) -> str: + """ + Given :essay_name:, return the string of the entire essay. + """ + target = os.path.join(os.path.dirname(__file__), essay_name) + with open(target, 'r') as in_file: + raw = in_file.read().replace('\n', ' ') + return re.sub(' +', ' ', raw) \ No newline at end of file From 4c643a80c14da59479bcf2589b3ccb594bc3d9e1 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 5 Sep 2024 17:44:23 -0400 Subject: [PATCH 19/39] Updates to parserServer --- awe_workbench/web/parserServer.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index cd18269..b9ab671 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -29,6 +29,12 @@ # --- [ CONSTS/VARS ] ------------------------------------------------------------------- +HOST = 'localhost' +PORT = 8766 +MAX_DATA_LIMIT = 2 ** 24 + +SPACY_MODEL = 'en_core_web_lg' + # --- [ CLASSES ] ----------------------------------------------------------------------- class parserServer: @@ -40,12 +46,12 @@ def __init__(self, pipeline_def=[]): # You can try setting overall_similarity_threshold # to 0.85 and/or perform_coreference_resolution to False self.parser = holmes_extractor.manager.Manager( - model='en_core_web_lg', + model=SPACY_MODEL, perform_coreference_resolution=True, extra_components=pipeline_def) asyncio.get_event_loop().run_until_complete( - websockets.serve(self.run_parser, 'localhost', 8766, max_size=2 ** 24)) + websockets.serve(self.run_parser, HOST, PORT, max_size=MAX_DATA_LIMIT)) print('parser running') asyncio.get_event_loop().run_forever() print('died') From 468988bff1614e07a5370c2baa1675275ab23588 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Mon, 9 Sep 2024 17:11:40 -0400 Subject: [PATCH 20/39] Updated parserServer stubs & pipeline --- awe_workbench/pipeline.py | 68 +++++++---- awe_workbench/web/parserServer.py | 192 +++++++++++++++++++++++++++--- 2 files changed, 221 insertions(+), 39 deletions(-) diff --git a/awe_workbench/pipeline.py b/awe_workbench/pipeline.py index 0120901..4989d72 100644 --- a/awe_workbench/pipeline.py +++ b/awe_workbench/pipeline.py @@ -40,27 +40,47 @@ # module that rougly identifies main ideas/supporting ideas/details in # argument-style texts. -pipeline_def = [{'package': 'spacytextblob', - 'module': 'spacytextblob', - 'component': 'spacytextblob', - 'language': ['en']}, - {'package': 'awe_components.components', - 'module': 'lexicalFeatures', - 'component': 'lexicalfeatures', - 'language': ['en']}, - {'package': 'awe_components.components', - 'module': 'syntaxDiscourseFeats', - 'component': 'syntaxdiscoursefeatures', - 'language': ['en']}, - {'package': 'awe_components.components', - 'module': 'viewpointFeatures', - 'component': 'viewpointfeatures', - 'language': ['en']}, - {'package': 'awe_components.components', - 'module': 'lexicalClusters', - 'component': 'lexicalclusters', - 'language': ['en']}, - {'package': 'awe_components.components', - 'module': 'contentSegmentation', - 'component': 'contentsegmentation', - 'language': ['en']}] +pipeline_def = [ + { + 'package': 'spacytextblob', + 'module': 'spacytextblob', + 'component': 'spacytextblob', + 'language': ['en'] + }, + { + 'package': 'coreferee', + 'module': 'coreferee', + 'component': 'coreferee', + 'language': ['en'] + }, + { + 'package': 'awe_components.components', + 'module': 'lexicalFeatures', + 'component': 'lexicalfeatures', + 'language': ['en'] + }, + { + 'package': 'awe_components.components', + 'module': 'syntaxDiscourseFeats', + 'component': 'syntaxdiscoursefeatures', + 'language': ['en'] + }, + { + 'package': 'awe_components.components', + 'module': 'viewpointFeatures', + 'component': 'viewpointfeatures', + 'language': ['en'] + }, + { + 'package': 'awe_components.components', + 'module': 'lexicalClusters', + 'component': 'lexicalclusters', + 'language': ['en'] + }, + { + 'package': 'awe_components.components', + 'module': 'contentSegmentation', + 'component': 'contentsegmentation', + 'language': ['en'] + } +] diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index b9ab671..29ef26d 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -20,47 +20,209 @@ import base64 import websockets import json +import spacy +import coreferee +import spacytextblob.spacytextblob # AWE imports -import holmes_extractor -import holmes_extractor.manager -import holmes_extractor.ontology from awe_components.components.utility_functions import content_pos +import awe_components.components.lexicalFeatures +import awe_components.components.syntaxDiscourseFeats +import awe_components.components.viewpointFeatures +import awe_components.components.lexicalClusters +import awe_components.components.contentSegmentation +from awe_workbench.pipeline import pipeline_def # --- [ CONSTS/VARS ] ------------------------------------------------------------------- HOST = 'localhost' PORT = 8766 MAX_DATA_LIMIT = 2 ** 24 - SPACY_MODEL = 'en_core_web_lg' +COMPONENTS = [el['component'] for el in pipeline_def] +AWE_INFO_KEYS = ['indicator', 'infoType', 'summaryType', 'filters', 'transformations'] # --- [ CLASSES ] ----------------------------------------------------------------------- class parserServer: - def __init__(self, pipeline_def=[]): + def __init__(self): - # set up and initializing Holmes - # Start the Holmes manager with the English model - # You can try setting overall_similarity_threshold - # to 0.85 and/or perform_coreference_resolution to False - self.parser = holmes_extractor.manager.Manager( - model=SPACY_MODEL, - perform_coreference_resolution=True, - extra_components=pipeline_def) + # Set up the NLP pipeline + print("initializing NLP pipeline...") + try: + self.nlp = spacy.load(SPACY_MODEL) + for comp in COMPONENTS: + self.nlp.add_pipe(comp) + except OSError as e: + print("There was an error loading 'en_core_web_lg' from spacy.") + raise OSError() from e + + # Instead of using holmes, we save the docs in memory + self.docs = {} + self.partial = "" + # Start the event loop, and run until the kill command + print("starting event loop -- use [KILL] command to terminate.") asyncio.get_event_loop().run_until_complete( websockets.serve(self.run_parser, HOST, PORT, max_size=MAX_DATA_LIMIT)) - print('parser running') + print('parser server running...') asyncio.get_event_loop().run_forever() - print('died') + print('parser server terminated...') async def kill(self, websocket): + """ + Command called to kill the parser server. + """ self.parser.close() + await websocket.send(json.dumps(True)) await websocket.close() exit() + def clear_parsed(self): + """ + Resets the document store to an empty mapping. + """ + self.docs = {} + return True + + def remove(self, label): + """ + Removes a document from the document store. + """ + del self.docs[label] + return True + + def parse_one(self, label, text): + """ + Parses a single document, and adds it to the document store. + + NOTE: we overwrite documents with the same key. + """ + self.docs[label] = self.partial + self.nlp(text) + self.partial = "" + print(f"parsed document: {label}") + return True + + def partial_text(self, text): + """ + Adds partial text to be processed in the future. + """ + self.partial += text + return True + + def parse_set(self, doc_set): + """ + Parses a document list of tuples (labels, text). + """ + for label, text in doc_set: + self.parse_one(label, text) + return True + + def labels(self): + """ + Returns a list of all document labels. + """ + return list(self.docs.keys()) + + def serialized(self, label): + """ + Returns a serialized document, selected by label + """ + return base64.b64encode(self.docs[label]) + + def new_search_phrase(self): + pass + + def remove_labeled_search(self): + pass + + def clear_searches(self): + pass + + def show_search_labels(self): + pass + + def match_documents(self): + pass + + def frequencies(self): + pass + + def topic_matches(self): + pass + + def awe_info(self, label, *args): + """ + Returns information specified in an AWE_Info object. + + This information is determined by: + * indic - indicator name + * itype - information type + * summ - summary type + * filt - filters + * trans - transformations + """ + doc = self.docs[label] + kwargs = {} + + # Get the appropriate arguments for AWE_Info + # Since we have a list of values, we need to map them first + for i, val in enumerate(args): + kwargs[AWE_INFO_KEYS[i]] = val + if not kwargs: + return None + else: + return doc._.AWE_Info(**kwargs) + + def fast_map_awe_info(self, command): + """ + Maps to awe_info(), given a simple command + """ + pass + + def doc_heads(self, label): + """ + Returns list of token heads for a given document. + """ + doc = self.docs[label] + return [token.head.i for token in doc] + + def pos(self, label): + """ + Returns positions of tokens for a given document. + """ + doc = self.docs[label] + return [token.pos_ for token in doc] + + def doc_dependencies(self, label): + """ + Returns dependencies of tokens for a given document. + """ + doc = self.docs[label] + return [token.dep_ for token in doc] + + def doc_entities(self, label): + """ + Returns all entities for a given document. + """ + doc = self.docs[label] + return [ + [ + ent.text, + ent.start_char, + ent.end_char, + ent.label_ + ] for ent in doc.ents + ] + + def tok_vecs(self, label): + """ + Returns token vectors for a given document. + """ + doc = self.docs[label] + return doc._.token_vectors + summaryLabels = [ 'mean_nSyll', 'med_nSyll', From d07625a7ad9e245ec12a295544cd0415374bba26 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Mon, 9 Sep 2024 17:19:42 -0400 Subject: [PATCH 21/39] Removed coreferee from test; covered in pipeline.py --- tests/test_awe_nlp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_awe_nlp.py b/tests/test_awe_nlp.py index db5df77..bf64e86 100644 --- a/tests/test_awe_nlp.py +++ b/tests/test_awe_nlp.py @@ -46,7 +46,6 @@ def setUp(self): # Initialize the pipeline try: self.nlp = spacy.load(SPACY_MODEL) - self.nlp.add_pipe('coreferee') for comp in COMPONENTS: self.nlp.add_pipe(comp) except OSError as e: From c887a479e4523d7d1b04320fc4c991d5797f5879 Mon Sep 17 00:00:00 2001 From: cwscott3 Date: Thu, 12 Sep 2024 13:12:04 -0400 Subject: [PATCH 22/39] No-holmes baseline tests --- awe_workbench/pipeline.py | 4 + examples/essays/essays.py | 21 ++ tests/{ => old_tests}/__init__.py | 0 tests/{ => old_tests}/test_awe_nlp.py | 0 .../{ => old_tests}/test_lexical_clusters.py | 0 .../{ => old_tests}/test_lexical_features.py | 0 .../test_prompt_specific_features.py | 0 tests/{ => old_tests}/test_server_api.py | 0 .../test_syntax_discourse_features.py | 0 .../test_viewpoint_perspective_features.py | 0 .../test_viewpoint_perspective_features2.py | 0 .../test_viewpoint_perspective_features3.py | 0 .../test_viewpoint_perspective_features4.py | 0 .../test_viewpoint_perspective_features5.py | 0 .../test_viewpoint_perspective_features6.py | 0 .../test_viewpoint_perspective_features7.py | 0 tests/test_awe_nlp_no_holmes.py | 262 ++++++++++++++++++ 17 files changed, 287 insertions(+) create mode 100644 examples/essays/essays.py rename tests/{ => old_tests}/__init__.py (100%) rename tests/{ => old_tests}/test_awe_nlp.py (100%) rename tests/{ => old_tests}/test_lexical_clusters.py (100%) rename tests/{ => old_tests}/test_lexical_features.py (100%) rename tests/{ => old_tests}/test_prompt_specific_features.py (100%) rename tests/{ => old_tests}/test_server_api.py (100%) rename tests/{ => old_tests}/test_syntax_discourse_features.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features2.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features3.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features4.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features5.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features6.py (100%) rename tests/{ => old_tests}/test_viewpoint_perspective_features7.py (100%) create mode 100644 tests/test_awe_nlp_no_holmes.py diff --git a/awe_workbench/pipeline.py b/awe_workbench/pipeline.py index 0120901..dc40ded 100644 --- a/awe_workbench/pipeline.py +++ b/awe_workbench/pipeline.py @@ -44,6 +44,10 @@ 'module': 'spacytextblob', 'component': 'spacytextblob', 'language': ['en']}, + {'package': 'coreferee', + 'module': 'coreferee', + 'component': 'coreferee', + 'language': ['en']}, {'package': 'awe_components.components', 'module': 'lexicalFeatures', 'component': 'lexicalfeatures', diff --git a/examples/essays/essays.py b/examples/essays/essays.py new file mode 100644 index 0000000..f6f9ae7 --- /dev/null +++ b/examples/essays/essays.py @@ -0,0 +1,21 @@ +""" +--- [ Test: essays.py ] ----------------------------------------------------------------- + +Code for retrieving an essay from the set of all essays, using the filename. + +Author: Caleb Scott (cwscott3@ncsu.edu) + +----------------------------------------------------------------------------------------- +""" + +import os +import re + +def get_essay(essay_name: str) -> str: + """ + Given :essay_name:, return the string of the entire essay. + """ + target = os.path.join(os.path.dirname(__file__), essay_name) + with open(target, 'r') as in_file: + raw = in_file.read().replace('\n', ' ') + return re.sub(' +', ' ', raw) diff --git a/tests/__init__.py b/tests/old_tests/__init__.py similarity index 100% rename from tests/__init__.py rename to tests/old_tests/__init__.py diff --git a/tests/test_awe_nlp.py b/tests/old_tests/test_awe_nlp.py similarity index 100% rename from tests/test_awe_nlp.py rename to tests/old_tests/test_awe_nlp.py diff --git a/tests/test_lexical_clusters.py b/tests/old_tests/test_lexical_clusters.py similarity index 100% rename from tests/test_lexical_clusters.py rename to tests/old_tests/test_lexical_clusters.py diff --git a/tests/test_lexical_features.py b/tests/old_tests/test_lexical_features.py similarity index 100% rename from tests/test_lexical_features.py rename to tests/old_tests/test_lexical_features.py diff --git a/tests/test_prompt_specific_features.py b/tests/old_tests/test_prompt_specific_features.py similarity index 100% rename from tests/test_prompt_specific_features.py rename to tests/old_tests/test_prompt_specific_features.py diff --git a/tests/test_server_api.py b/tests/old_tests/test_server_api.py similarity index 100% rename from tests/test_server_api.py rename to tests/old_tests/test_server_api.py diff --git a/tests/test_syntax_discourse_features.py b/tests/old_tests/test_syntax_discourse_features.py similarity index 100% rename from tests/test_syntax_discourse_features.py rename to tests/old_tests/test_syntax_discourse_features.py diff --git a/tests/test_viewpoint_perspective_features.py b/tests/old_tests/test_viewpoint_perspective_features.py similarity index 100% rename from tests/test_viewpoint_perspective_features.py rename to tests/old_tests/test_viewpoint_perspective_features.py diff --git a/tests/test_viewpoint_perspective_features2.py b/tests/old_tests/test_viewpoint_perspective_features2.py similarity index 100% rename from tests/test_viewpoint_perspective_features2.py rename to tests/old_tests/test_viewpoint_perspective_features2.py diff --git a/tests/test_viewpoint_perspective_features3.py b/tests/old_tests/test_viewpoint_perspective_features3.py similarity index 100% rename from tests/test_viewpoint_perspective_features3.py rename to tests/old_tests/test_viewpoint_perspective_features3.py diff --git a/tests/test_viewpoint_perspective_features4.py b/tests/old_tests/test_viewpoint_perspective_features4.py similarity index 100% rename from tests/test_viewpoint_perspective_features4.py rename to tests/old_tests/test_viewpoint_perspective_features4.py diff --git a/tests/test_viewpoint_perspective_features5.py b/tests/old_tests/test_viewpoint_perspective_features5.py similarity index 100% rename from tests/test_viewpoint_perspective_features5.py rename to tests/old_tests/test_viewpoint_perspective_features5.py diff --git a/tests/test_viewpoint_perspective_features6.py b/tests/old_tests/test_viewpoint_perspective_features6.py similarity index 100% rename from tests/test_viewpoint_perspective_features6.py rename to tests/old_tests/test_viewpoint_perspective_features6.py diff --git a/tests/test_viewpoint_perspective_features7.py b/tests/old_tests/test_viewpoint_perspective_features7.py similarity index 100% rename from tests/test_viewpoint_perspective_features7.py rename to tests/old_tests/test_viewpoint_perspective_features7.py diff --git a/tests/test_awe_nlp_no_holmes.py b/tests/test_awe_nlp_no_holmes.py new file mode 100644 index 0000000..8810dbc --- /dev/null +++ b/tests/test_awe_nlp_no_holmes.py @@ -0,0 +1,262 @@ +""" +--- [ Test: test_awe_nlp.py ] ----------------------------------------------------------- + +Set of corresponding tests for document features found in awe_nlp.py of writingobserver. + +Author: Caleb Scott (cwscott3@ncsu.edu) + +----------------------------------------------------------------------------------------- +""" + +# --- [ IMPORTS ] ----------------------------------------------------------------------- + +import unittest +import json +import spacy +import coreferee +import spacytextblob.spacytextblob + +import awe_components.components.lexicalFeatures +import awe_components.components.syntaxDiscourseFeats +import awe_components.components.viewpointFeatures +import awe_components.components.lexicalClusters +import awe_components.components.contentSegmentation + +from awe_workbench.pipeline import pipeline_def +from examples.essays.essays import get_essay + +# --- [ CONSTS/VARS ] ------------------------------------------------------------------- + +SPACY_MODEL = 'en_core_web_lg' + +COMPONENTS = [el['component'] for el in pipeline_def] + +TEST_TEXT = "gre6.txt" + +# --- [ CLASSES ] ----------------------------------------------------------------------- + +class AWENLPTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + """ + This is the basic initializer for all test classes. + + Sets up the spacy pipeline. + """ + # Initialize the pipeline + try: + cls.nlp = spacy.load(SPACY_MODEL) + for comp in COMPONENTS: + cls.nlp.add_pipe(comp) + except OSError as e: + print("There was an error loading 'en_core_web_lg' from spacy.") + raise OSError() from e + + # Now get the text + cls.doc = cls.nlp(get_essay(TEST_TEXT)) + + def test_is_academic(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='is_academic',summaryType='percent'), 22) + + def test_vwp_interactive_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_interactive',summaryType='percent'), 4) + + def test_is_latinate(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='is_latinate',summaryType='percent'), 13) + + def test_vwp_evaluation_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_evaluation',summaryType='total'), 704) + + def test_vwp_emotionword_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_emotionword',summaryType='percent'), 2) + + def test_vwp_argumentword_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_argumentword',summaryType='percent'), 100) + + def test_vwp_explicit_argument_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_explicit_argument',summaryType='percent'), 15) + + def test_vwp_statements_of_opinion_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_opinion',summaryType='percent'), 78) + + def test_vwp_statements_of_fact_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_fact',summaryType='percent'), 22) + + def test_transitions_counts(self): + counts_dict = json.loads(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',summaryType='counts')) + self.assertEqual(sum(list(counts_dict.values())), 25) + + def test_transitions_positive_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['positive'])],summaryType='total'), 0) + + def test_transitions_conditional_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['conditional'])],summaryType='total'), 0) + + def test_transitions_consequential_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['consequential'])],summaryType='total'), 0) + + def test_transitions_contrastive_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['contrastive'])],summaryType='total'), 5) + + def test_transitions_counterpoint_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['counterpoint'])],summaryType='total'), 0) + + def test_transitions_comparative_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['comparative'])],summaryType='total'), 1) + + def test_transitions_crossreferential_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['crossreferential'])],summaryType='total'), 0) + + def test_transitions_illustrative_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['illustrative'])],summaryType='total'), 6) + + def test_transitions_negative_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['negative'])],summaryType='total'), 0) + + def test_transitions_emphatic_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['emphatic'])],summaryType='total'), 2) + + def test_transitions_evidentiary_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['evidentiary'])],summaryType='total'), 0) + + def test_transitions_general_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['general'])],summaryType='total'), 0) + + def test_transitions_ordinal_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['ordinal'])],summaryType='total'), 0) + + def test_transitions_purposive_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['purposive'])],summaryType='total'), 0) + + def test_transitions_periphrastic_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['periphrastic'])],summaryType='total'), 0) + + def test_transitions_hypothetical_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['hypothetical'])],summaryType='total'), 0) + + def test_transitions_summative_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['summative'])],summaryType='total'), 0) + + def test_transitions_introductory_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['introductory'])],summaryType='total'), 5) + + def test_pos_adj_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADJ'])],summaryType='total'), 62) + + def test_pos_adv_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADV'])],summaryType='total'), 23) + + def test_pos_noun_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['NOUN'])],summaryType='total'), 189) + + def test_pos_propn_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['PROPN'])],summaryType='total'), 13) + + def test_pos_verb_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['VERB'])],summaryType='total'), 78) + + def test_pos_num_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['NUM'])],summaryType='total'), 2) + + def test_pos_adp_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADP'])],summaryType='total'), 81) + + def test_pos_cconj_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['CCONJ'])],summaryType='total'), 14) + + def test_pos_sconj_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['SCONJ'])],summaryType='total'), 17) + + def test_pos_aux_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['AUX'])],summaryType='total'), 36) + + def test_pos_pron_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='pos_',filters=[('==', ['PRON'])],summaryType='total'), 22) + + def test_sentence_types_counts(self): + types_dict = json.loads(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',summaryType='counts')) + self.assertEqual(sum(list(types_dict.values())), 35) + + def test_sentence_types_simple_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Simple'])],summaryType='total'), 13) + + def test_sentence_types_simple_complex_pred_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleComplexPred'])],summaryType='total'), 3) + + def test_sentence_types_simple_compound_pred_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundPred'])],summaryType='total'), 0) + + def test_sentence_types_simple_compound_complex_pred_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundComplexPred'])],summaryType='total'), 0) + + def test_sentence_types_compound_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Compound'])],summaryType='total'), 2) + + def test_sentence_types_complex_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Complex'])],summaryType='total'), 16) + + def test_sentence_types_compound_complex_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['CompoundComplex'])],summaryType='total'), 1) + + def test_vwp_source_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_source',summaryType='percent'), 0) + + def test_vwp_attribution_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_attribution',summaryType='percent'), 0) + + def test_vwp_cite_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_cite',summaryType='percent'), 0) + + def test_vwp_quoted_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_quoted',summaryType='percent'), 0) + + def test_vwp_direct_speech_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='vwp_direct_speech',summaryType='percent'), 0) + + def test_vwp_in_direct_speech_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_in_direct_speech',summaryType='percent'), 0) + + def test_vwp_tone_greater_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_tone',filters=[('>', [0.4])],summaryType='percent'), 1) + + def test_vwp_tone_lesser_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_tone',filters=[('<', [-0.4])],summaryType='percent'), 2) + + def test_concrete_details_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='concrete_details',summaryType='percent'), 2) + + def test_main_ideas_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='main_ideas',summaryType='total'), 9) + + def test_supporting_ideas_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='supporting_ideas',summaryType='total'), 11) + + def test_supporting_details_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='supporting_details',summaryType='total'), 6) + + def test_nSyll_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='nSyll',filters=[('>', [3])],summaryType='percent'), 10) + + def test_max_freq_lesser_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='max_freq',filters=[('<', [4])],summaryType='percent'), 9) + + def test_sents_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='sents',summaryType='total'), 35) + + def test_delimiter_n_total(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='delimiter_n',summaryType='total'), 223) + + def test_vwp_character_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='vwp_character',summaryType='percent'), 2) + + def test_in_past_tense_scope_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='percent'), 33) + + def test_vwp_propositional_attitudes_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='vwp_propositional_attitudes',summaryType='percent'), 53) + + def test_vwp_social_awareness_percent(self): + self.assertEqual(self.__class__.doc._.AWE_Info(infoType='Doc',indicator='vwp_social_awareness',summaryType='percent'), 3) + +# --- [ END ] --------------------------------------------------------------------------- From ff480f3a421f2b1b3dd7b8e2d07c30e279de18da Mon Sep 17 00:00:00 2001 From: cwscott3 Date: Thu, 12 Sep 2024 13:14:26 -0400 Subject: [PATCH 23/39] Updated modules --- examples/__init__.py | 0 examples/essays/__init__.py | 0 tests/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 examples/__init__.py create mode 100644 examples/essays/__init__.py create mode 100644 tests/__init__.py diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/essays/__init__.py b/examples/essays/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 From 1c36adebdce64beb271055ce3ca6f1c0a8b15a87 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 19 Sep 2024 12:31:53 -0400 Subject: [PATCH 24/39] Removed holmes dependency --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ff6a86c..1a3dac4 100755 --- a/setup.cfg +++ b/setup.cfg @@ -33,7 +33,6 @@ install_requires = awe_spellcorrect awe_components awe_lexica - holmes_extractor websockets websocket-client aiohttp From 7e9b8db0dafce490d8c59fd1cea510ea40deb046 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 19 Sep 2024 13:32:08 -0400 Subject: [PATCH 25/39] Removed git conflict text --- awe_workbench/pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/awe_workbench/pipeline.py b/awe_workbench/pipeline.py index cbdf0b5..4989d72 100644 --- a/awe_workbench/pipeline.py +++ b/awe_workbench/pipeline.py @@ -40,7 +40,6 @@ # module that rougly identifies main ideas/supporting ideas/details in # argument-style texts. -<<<<<<< HEAD pipeline_def = [ { 'package': 'spacytextblob', From 9bda951d62195c4eb112a9ddb609fdacba63a987 Mon Sep 17 00:00:00 2001 From: arsalaan Date: Mon, 7 Oct 2024 13:31:50 -0400 Subject: [PATCH 26/39] Attempted removal of holmes from parserServer.py --- awe_workbench/web/oldParserServer.py | 1514 +++++++++ awe_workbench/web/parserServer.py | 2767 ++++++++--------- .../web/parserServer.py:Zone.Identifier | 3 + 3 files changed, 2770 insertions(+), 1514 deletions(-) create mode 100644 awe_workbench/web/oldParserServer.py create mode 100644 awe_workbench/web/parserServer.py:Zone.Identifier diff --git a/awe_workbench/web/oldParserServer.py b/awe_workbench/web/oldParserServer.py new file mode 100644 index 0000000..29ef26d --- /dev/null +++ b/awe_workbench/web/oldParserServer.py @@ -0,0 +1,1514 @@ +""" +--- [ Test: parserServer.py ] ----------------------------------------------------------- + +Main server for parsing commands regarding spacy pipeline. + +@8/20/2024: modifications made to function without the use of holmes-extractor; this is +to make AWE_Workbench far easier to manage regarding dependency issues. + +Author: Caleb Scott (cwscott3@ncsu.edu) + +Copyright 2022, Educational Testing Service + +----------------------------------------------------------------------------------------- +""" + +# --- [ IMPORTS ] ----------------------------------------------------------------------- + +# Basic lib imports +import asyncio +import base64 +import websockets +import json +import spacy +import coreferee +import spacytextblob.spacytextblob + +# AWE imports +from awe_components.components.utility_functions import content_pos +import awe_components.components.lexicalFeatures +import awe_components.components.syntaxDiscourseFeats +import awe_components.components.viewpointFeatures +import awe_components.components.lexicalClusters +import awe_components.components.contentSegmentation +from awe_workbench.pipeline import pipeline_def + +# --- [ CONSTS/VARS ] ------------------------------------------------------------------- + +HOST = 'localhost' +PORT = 8766 +MAX_DATA_LIMIT = 2 ** 24 +SPACY_MODEL = 'en_core_web_lg' +COMPONENTS = [el['component'] for el in pipeline_def] +AWE_INFO_KEYS = ['indicator', 'infoType', 'summaryType', 'filters', 'transformations'] + +# --- [ CLASSES ] ----------------------------------------------------------------------- + +class parserServer: + + def __init__(self): + + # Set up the NLP pipeline + print("initializing NLP pipeline...") + try: + self.nlp = spacy.load(SPACY_MODEL) + for comp in COMPONENTS: + self.nlp.add_pipe(comp) + except OSError as e: + print("There was an error loading 'en_core_web_lg' from spacy.") + raise OSError() from e + + # Instead of using holmes, we save the docs in memory + self.docs = {} + self.partial = "" + + # Start the event loop, and run until the kill command + print("starting event loop -- use [KILL] command to terminate.") + asyncio.get_event_loop().run_until_complete( + websockets.serve(self.run_parser, HOST, PORT, max_size=MAX_DATA_LIMIT)) + print('parser server running...') + asyncio.get_event_loop().run_forever() + print('parser server terminated...') + + async def kill(self, websocket): + """ + Command called to kill the parser server. + """ + self.parser.close() + await websocket.send(json.dumps(True)) + await websocket.close() + exit() + + def clear_parsed(self): + """ + Resets the document store to an empty mapping. + """ + self.docs = {} + return True + + def remove(self, label): + """ + Removes a document from the document store. + """ + del self.docs[label] + return True + + def parse_one(self, label, text): + """ + Parses a single document, and adds it to the document store. + + NOTE: we overwrite documents with the same key. + """ + self.docs[label] = self.partial + self.nlp(text) + self.partial = "" + print(f"parsed document: {label}") + return True + + def partial_text(self, text): + """ + Adds partial text to be processed in the future. + """ + self.partial += text + return True + + def parse_set(self, doc_set): + """ + Parses a document list of tuples (labels, text). + """ + for label, text in doc_set: + self.parse_one(label, text) + return True + + def labels(self): + """ + Returns a list of all document labels. + """ + return list(self.docs.keys()) + + def serialized(self, label): + """ + Returns a serialized document, selected by label + """ + return base64.b64encode(self.docs[label]) + + def new_search_phrase(self): + pass + + def remove_labeled_search(self): + pass + + def clear_searches(self): + pass + + def show_search_labels(self): + pass + + def match_documents(self): + pass + + def frequencies(self): + pass + + def topic_matches(self): + pass + + def awe_info(self, label, *args): + """ + Returns information specified in an AWE_Info object. + + This information is determined by: + * indic - indicator name + * itype - information type + * summ - summary type + * filt - filters + * trans - transformations + """ + doc = self.docs[label] + kwargs = {} + + # Get the appropriate arguments for AWE_Info + # Since we have a list of values, we need to map them first + for i, val in enumerate(args): + kwargs[AWE_INFO_KEYS[i]] = val + if not kwargs: + return None + else: + return doc._.AWE_Info(**kwargs) + + def fast_map_awe_info(self, command): + """ + Maps to awe_info(), given a simple command + """ + pass + + def doc_heads(self, label): + """ + Returns list of token heads for a given document. + """ + doc = self.docs[label] + return [token.head.i for token in doc] + + def pos(self, label): + """ + Returns positions of tokens for a given document. + """ + doc = self.docs[label] + return [token.pos_ for token in doc] + + def doc_dependencies(self, label): + """ + Returns dependencies of tokens for a given document. + """ + doc = self.docs[label] + return [token.dep_ for token in doc] + + def doc_entities(self, label): + """ + Returns all entities for a given document. + """ + doc = self.docs[label] + return [ + [ + ent.text, + ent.start_char, + ent.end_char, + ent.label_ + ] for ent in doc.ents + ] + + def tok_vecs(self, label): + """ + Returns token vectors for a given document. + """ + doc = self.docs[label] + return doc._.token_vectors + + summaryLabels = [ + 'mean_nSyll', + 'med_nSyll', + 'max_nSyll', + 'min_nSyll', + 'std_nSyll', + 'mean_sqnChars', + 'med_sqnChars', + 'max_sqnChars', + 'min_sqnChars', + 'std_sqnChars', + 'propn_latinate', + 'propn_academic', + 'mean_family_size', + 'med_family_size', + 'max_family_size', + 'min_family_size', + 'std_family_size', + 'mean_concreteness', + 'med_concreteness', + 'max_concreteness', + 'min_concreteness', + 'std_concreteness', + 'mean_logNSenses', + 'med_logNSenses', + 'max_logNSenses', + 'min_logNSenses', + 'std_logNSenses', + 'mean_nMorph', + 'med_nMorph', + 'max_nMorph', + 'min_nMorph', + 'std_nMorph', + 'mean_logfreq_HAL', + 'med_logfreq_HAL', + 'max_logfreq_HAL', + 'min_logfreq_HAL', + 'std_logfreq_HAL', + 'mean_root_fam_size', + 'med_root_fam_size', + 'max_root_fam_size', + 'min_root_fam_size', + 'std_root_fam_size', + 'mean_root_pfmf', + 'med_root_pfmf', + 'max_root_pfmf', + 'min_root_pfmf', + 'std_root_pfmf', + 'mean_token_frequency', + 'median_token_frequency', + 'max_token_frequency', + 'min_token_frequency', + 'std_token_frequency', + 'mean_lemma_frequency', + 'median_lemma_frequency', + 'max_lemma_frequency', + 'min_lemma_frequency', + 'std_lemma_frequency', + 'mean_max_frequency', + 'median_max_frequency', + 'max_max_frequency', + 'min_max_frequency', + 'std_max_frequency', + 'propn_abstract_traits', + 'propn_animates', + 'propn_deictics', + 'wf_type_count', + 'lemma_type_count', + 'type_count', + 'token_count', + 'paragraph_count', + 'mean_paragraph_length', + 'median_paragraph_length', + 'max_paragraph_length', + 'min_paragraph_length', + 'stdev_paragraph_length', + 'propn_transition_words', + 'transition_category_count', + 'transition_word_type_count', + 'mean_transition_distance', + 'median_transition_distance', + 'max_transition_distance', + 'min_transition_distance', + 'stdev_transition_distance', + 'mean_sent_cohesion', + 'median_sent_cohesion', + 'max_sent_cohesion', + 'min_sent_cohesion', + 'stdev_sent_cohesion', + 'mean_slider_cohesion', + 'median_slider_cohesion', + 'max_slider_cohesion', + 'min_slider_cohesion', + 'stdev_slider_cohesion', + 'num_corefs', + 'mean_coref_chain_len', + 'median_coref_chain_len', + 'max_coref_chain_len', + 'min_coref_chain_len', + 'stdev_coref_chain_len', + 'sentence_count', + 'mean_sentence_len', + 'median_sentence_len', + 'max_sentence_len', + 'min_sentence_len', + 'std_sentence_len', + 'mean_words_to_sentence_root', + 'median_words_to_sentence_root', + 'max_words_to_sentence_root', + 'min_words_to_sentence_root', + 'stdev_words_to_sentence_root', + 'meanRhemeDepth', + 'medianRhemeDepth', + 'maxRhemeDepth', + 'minRhemeDepth', + 'stdevRhemeDepth', + 'meanThemeDepth', + 'medianThemeDepth', + 'maxThemeDepth', + 'minThemeDepth', + 'stdevThemeDepth', + 'meanWeightedDepth', + 'medianWeightedDepth', + 'maxWeightedDepth', + 'minWeightedDepth', + 'stdevWeightedDepth', + 'meanWeightedBreadth', + 'medianWeightedBreadth', + 'maxWeightedBreadth', + 'minWeightedBreadth', + 'stdevWeightedBreadth', + 'syntacticVariety', + 'propn_past', + 'propn_argument_words', + 'propn_direct_speech', + 'propn_egocentric', + 'propn_allocentric', + 'mean_subjectivity', + 'median_subjectivity', + 'min_subjectivity', + 'max_subjectivity', + 'stdev_subjectivity', + 'mean_polarity', + 'median_polarity', + 'min_polarity', + 'max_polarity', + 'stdev_polarity', + 'mean_sentiment', + 'median_sentiment', + 'min_sentiment', + 'max_sentiment', + 'stdev_sentiment', + 'mean_main_cluster_span', + 'median_main_cluster_span', + 'min_main_cluster_span', + 'max_main_cluster_span', + 'stdev_main_cluster_span', + 'propn_devwords', + 'mean_devword_nsyll', + 'median_devword_nsyll', + 'min_devword_nsyll', + 'max_devword_nsyll', + 'stdev_devword_nsyll', + 'mean_devword_nmorph', + 'median_devword_nmorph', + 'min_devword_nmorph', + 'max_devword_nmorph', + 'stdev_devword_nmorph', + 'mean_devword_nsenses', + 'median_devword_nsenses', + 'min_devword_nsenses', + 'max_devword_nsenses', + 'stdev_devword_nsenses', + 'mean_devword_token_freq', + 'median_devword_token_freq', + 'min_devword_token_freq', + 'max_devword_token_freq', + 'stdev_devword_token_freq', + 'mean_devword_concreteness', + 'median_devword_concreteness', + 'min_devword_concreteness', + 'max_devword_concreteness', + 'stdev_devword_concreteness' + ] + + async def run_parser(self, websocket, path): + current_doc = '' + async for message in websocket: + + messagelist = json.loads(message) + print(messagelist) + command = '' + if messagelist[0] == 'KILL': + command = 'KILL' + await websocket.send(json.dumps(True)) + await self.kill(websocket) + elif messagelist[0] == 'CLEARPARSED': + command = 'CLEARPARSED' + self.parser.remove_all_documents() + await websocket.send(json.dumps(True)) + elif messagelist[0] == 'REMOVE': + command = 'REMOVE' + label = messagelist[1] + self.parser.remove_document(label) + await websocket.send(json.dumps(True)) + elif messagelist[0] == 'PARSEONE': + command = 'PARSEONE' + label = messagelist[1] + text = current_doc + messagelist[2] + current_doc = '' + if label in self.parser.list_document_labels(): + self.parser.remove_document(label) + self.parser.parse_and_register_document(text, label) + doc = self.parser.get_document(label) + await websocket.send(json.dumps(True)) + elif messagelist[0] == 'PARTIALTEXT': + current_document += messagelist[2] + elif messagelist[0] == 'PARSESET': + command = 'PARSESET' + results = [] + [labels, texts] = messagelist[1] + for i, text in enumerate(texts): + text = texts[i] + print('parsed document', str(i+1), 'of', len(texts)) + if text is not None and len(text) > 0: + if labels[i] in self.parser.list_document_labels(): + self.parser.remove_document(labels[i]) + self.parser.parse_and_register_document( + text, labels[i]) + await websocket.send(json.dumps(True)) + elif messagelist[0] == 'LABELS': + command = 'LABELS' + labels = self.parser.list_document_labels() + await websocket.send(json.dumps(labels)) + elif messagelist[0] == 'SERIALIZED': + command = 'SERIALIZED' + label = messagelist[1] + serialized = base64.b64encode( + self.parser.serialize_document(label)) + await websocket.send(serialized) + elif messagelist[0] == 'NEWSEARCHPHRASE': + command = 'NEWSEARCHPHRASE' + search_phrase_text = messagelist[1] + label = messagelist[2] + ok = self.parser.register_search_phrase(search_phrase_text) + await websocket.send(ok) + elif messagelist[0] == 'REMOVELABELEDSEARCH': + command = 'REMOVELABELEDSEARCH' + label = messagelist[1] + self.parser.remove_all_search_phrases_with_label(label) + await websocket.send(json.dumps(True)) + elif messagelist[0] == 'CLEARSEARCHES': + command = 'CLEARSEARCHES' + self.parser.remove_all_search_phrases() + await websocket.send(json.dumps(True)) + elif messagelist[0] == 'SHOWSEARCHLABELS': + command = 'SHOWSEARCHLABELS' + labels = self.parser.list_search_phrase_labels() + await websocket.send(json.dumps(labels)) + elif messagelist[0] == 'MATCH_DOCUMENTS': + command = 'MATCH_DOCUMENTS' + matches = self.parser.match() + await websocket.send(json.dumps(matches)) + elif messagelist[0] == 'FREQUENCIES': + command = 'FREQUENCIES' + freqinfo = self.parser.get_corpus_frequency_information() + await websocket.send(json.dumps(freqinfo)) + elif messagelist[0] == 'TOPIC_MATCHES': + command = 'TOPIC_MATCHES' + text_to_match = messagelist[1] + # This search takes a long list of keyword parameters, + # all of them with preset default thresholds. TBD: + # expose all of these parameters in more complex topic + # match functionality. Holmes extractor documentation + # describes what each of these parameters involves. + matches = self.parser.topic_match_documents_against( + text_to_match, + word_embedding_match_threshold=.42, + relation_score=20, + reverse_only_relation_score=15, + single_word_score=10, + single_word_any_tag_score=5, + different_match_cutoff_score=10, + relation_matching_frequency_threshold=0.0, + embedding_matching_frequency_threshold=0.0, + use_frequency_factor=True) + await websocket.send(json.dumps(matches)) + # Holmes Extractor also has supervised topic model + # building facilities using the functions + # get_supervised_topic_training_basis(), + # and deserialize_supervised_topic_classifier(). + # TBD: Add support for Holmes supervised topic model + # building. + elif messagelist[0] == 'AWE_INFO': + label = messagelist[1] + doc = self.parser.get_document(label) + indic = None + itype = None + summ = None + filt = None + if len(messagelist) == 3: + indic = messagelist[2] + await websocket.send( + doc._.AWE_Info(indicator=indic)) + elif len(messagelist) == 4: + indic = messagelist[2] + itype = messagelist[3] + await websocket.send( + doc._.AWE_Info(indicator=indic,infoType=itype)) + elif len(messagelist) == 5: + indic = messagelist[2] + itype = messagelist[3] + summ = messagelist[4] + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ) + if type(result) in [int, float, bool]: + await websocket.send(str(result)) + else: + await websocket.send(result) + + elif len(messagelist) == 6: + indic = messagelist[2] + itype = messagelist[3] + summ = messagelist[4] + filt = json.loads(messagelist[5]) + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt) + if type(result) in [int, float]: + await websocket.send(str(result)) + else: + await websocket.send(result) + elif len(messagelist) == 7: + indic = messagelist[2] + itype = messagelist[3] + summ = messagelist[4] + filt = json.loads(messagelist[5]) + trans = json.loads(messagelist[6]) + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt,transformations=trans) + if type(result) in [int, float]: + await websocket.send(str(result)) + else: + await websocket.send(result) + + else: + await websocket.send(json.dumps([])) + elif messagelist[0] == 'DOCTOKENS': + label = messagelist[1] + doc = self.parser.get_document(label) + if doc is not None: + await websocket.send( + doc._.AWE_Info(indicator='text')) + else: + await websocket.send(json.dumps([])) + elif messagelist[0] == 'DOCTOKENS_WITH_WS': + label = messagelist[1] + doc = self.parser.get_document(label) + if doc is not None: + await websocket.send( + doc._.AWE_Info(indicator='text_with_ws')) + else: + await websocket.send(json.dumps([])) + elif messagelist[0] == 'DOCHEADS': + command = 'DOCHEADS' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + heads = [token.head.i for token in doc] + await websocket.send(json.dumps(heads)) + elif messagelist[0] == 'POS': + command = 'POS' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + heads = [token.pos_ for token in doc] + await websocket.send(json.dumps(heads)) + elif messagelist[0] == 'DOCDEPENDENCIES': + command = 'DOCDEPENDENCIES' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + deps = [token.dep_ for token in doc] + await websocket.send(json.dumps(deps)) + elif messagelist[0] == 'DOCENTITIES': + command = 'DOCENTITIES' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + ents = [[ent.text, + ent.start_char, + ent.end_char, + ent.label_] for ent in doc.ents] + await websocket.send(json.dumps(ents)) + elif messagelist[0] == 'TOKVECS': + command = 'TOKVECS' + # List returned contains lists pairing token + # offset with token vectors cast as strings + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.token_vectors)) + elif messagelist[0] == 'LEMMAS': + command = 'LEMMAS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(indicator='lemma_') + ) + elif messagelist[0] == 'STOPWORDS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(indicator='is_stop') + ) + elif messagelist[0] == 'WORDTYPES': + command = 'WORDTYPES' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='lower_',filters=[('is_alpha', ['True']),('is_stop', ['False'])],summaryType = 'uniq') + )) + elif messagelist[0] == 'ROOTS': + command = 'ROOTS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='root') + )) + elif messagelist[0] == 'SYLLABLES': + command = 'SYLLABLES' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='nSyll'))) + elif messagelist[0] == 'WORDLENGTH': + command = 'WORDLENGTH' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt']) + )) + elif messagelist[0] == 'LATINATES': + command = 'LATINATES' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'ACADEMICS': + command = 'ACADEMICS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'SENSENUMS': + command = 'SENSENUMS' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'LOGSENSENUMS': + command = 'LOGSENSENUMS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])],transformations=['log']) + )) + elif messagelist[0] == 'MORPHOLOGY': + command = 'MORPHOLOGY' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='morphology') + )) + elif messagelist[0] == 'MORPHNUMS': + command = 'MORPHNUMS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='nMorph',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'HALROOTFREQS': + command = 'HALROOTFREQS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'HALLOGROOTFREQS': + command = 'HALLOGROOTFREQS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log']) + )) + elif messagelist[0] == 'ROOTFAMSIZES': + command = 'ROOTFAMSIZES' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'ROOTPFMFS': + command = 'ROOTPFMFS' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'FAMILYSIZES': + command = 'FAMILYSIZES' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='family_size',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'TOKFREQS': + command = 'TOKFREQS' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])]) + )) + elif messagelist[0] == 'LEMMAFREQS': + command = 'LEMMAfREQS' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='lemma_freq'))) + elif messagelist[0] == 'ROOTFREQS': + command = 'ROOTFREQS' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='root_Freq'))) + elif messagelist[0] == 'MAXFREQS': + command = 'MAXFREQS' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='max_freq'))) + elif messagelist[0] == 'CONCRETES': + command = 'CONCRETES' + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='concreteness'))) + elif messagelist[0] == 'ABSTRACTTRAITS': + command = 'ABSTRACTTRAITS' + # Position in the list returned equals position + # in the document. Flag 1 if the word names an + # abstract trait, 0 otherwise + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='abstract_trait'))) + elif messagelist[0] == 'ANIMATES': + command = 'ANIMATES' + # Position in the list returned equals position + # in the document. Flag 1 if the word names an animate + # entity, 0 otherwise + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='animate'))) + elif messagelist[0] == 'LOCATIONS': + command = 'LOCATIONS' + # Position in the list returned equals position + # in the document. Flag 1 if the word names an + # animate entity, 0 otherwise + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='location'))) + elif messagelist[0] == 'DEICTICS': + command = 'DEICTICS' + # Position in the list returned equals position in + # the document. Flag 1 if the word names a deictic + # element, 0 otherwise + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='deictic'))) + elif messagelist[0] == 'PARAGRAPHS': + command = 'PARAGRAPHS' + # Items in the list indicate word offsets in the document + # at which paragraph breaks appear + label = messagelist[1] + doc = self.parser.get_document(label) + + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='delimiter_n') + )) + # doc._.paragraph_breaks)) + elif messagelist[0] == 'SENTENCES': + command = 'SENTENCES' + # Items in the list indicate word offsets in the document + # at which paragraph breaks appear + label = messagelist[1] + doc = self.parser.get_document(label) + + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='sents') + )) + #await websocket.send(json.dumps( + # [(sent.start, sent.end) for sent in doc.sents])) + elif messagelist[0] == 'PARAGRAPHLENS': + command = 'PARAGRAPHLENS' + # Items in the list indicate lengths of paragraphs listed + # by offset in GETPARAGRAPHS + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen']) + )) + elif messagelist[0] == 'TRANSITIONPROFILE': + command = 'TRANSITIONPROFILE' + # Returns a rich data structure in a list containing + # (1) total number of transition words in the document + # (2) a dictionary that lists the frequency of a predefined + # set of transition word categories. + # (3) a dictionary that lists the frequency of individual + # transition words + # (4) a list of lists that provides for each transition + # word the word string, its start and stop offsets, + # and its transition word category. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.transition_word_profile)) + elif messagelist[0] == 'TRANSITIONS': + command = 'TRANSITIONS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='transitions') + )) + elif messagelist[0] == 'TRANSITIONDISTANCES': + command = 'TRANSITIONDISTANCES' + # List of cosine distances between ten-word windows + # before and after a transition + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='transition_distances') + )) + elif messagelist[0] == 'SENTENCECOHESIONS': + command = 'SENTENCECOHESIONS' + # List of cosine distances between ten-word windows + # before and after a sentence boundary + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='intersentence_cohesions') + )) + elif messagelist[0] == 'SLIDERCOHESIONS': + command = 'SLIDERCOHESIONS' + # List of cosine distances between ten-word windows + # before and after a sliding window through the text + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='sliding_window_cohesions') + )) + elif messagelist[0] == 'COREFCHAINS': + command = 'COREFCHAINS' + # List of coreference chains found in document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.coref_chains)) + elif messagelist[0] == 'RHEMEDEPTHS': + command = 'RHEMEDEPTHS' + # Syntactic depth of the sentence rheme -- part of + # sentence after the main verb where new information + # is usually placed + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='syntacticDepthsOfRhemes') + )) + elif messagelist[0] == 'THEMEDEPTHS': + command = 'THEMEDEPTHS' + # Syntactic depth of the sentence theme -- part + # of sentence before the main verb where given + # information is usually placed + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='syntacticDepthsOfThemes') + )) + elif messagelist[0] == 'WEIGHTEDDEPTHS': + command = 'WEIGHTEDDEPTHS' + # Syntactic depth weighted to penalize + # left-embedded structures + # that tend to be harder to process + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='weightedSyntacticDepths') + )) + elif messagelist[0] == 'WEIGHTEDBREADTHS': + command = 'WEIGHTEDBREADTHS' + # Syntactic breadth -- measure of extent to which sentence + # structure is additive, consisting of coordinated + # structures and loosely appended modifiers typical of + # spoken, often unplanned sentence production + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='weightedSyntacticBreadths') + )) + elif messagelist[0] == 'SENTENCETYPES': + # tuple giving number and location of sentence types + # format: + # (1,1,1,1,[1,2,3,4]) would be the record for a text that + # had four sentences -- simple sentence, compound sentence, + # complex sentence, and compound/complex sentence, in + # that order. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='sentence_types') + )) + elif messagelist[0] == 'SYNTACTICPROFILE': + command = 'SYNTACTICPROFILE' + # Returns a dictionary containing frequency information + # about the syntactic relations and categories in the text. + # This includes information about the frequency of parts + # of speech, morphological categories, and syntactic + # dependencies between specific parts of speech. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.syntacticProfile)) + elif messagelist[0] == 'NORMEDSYNTACTICPROFILE': + command = 'NORMEDSYNTACTICPROFILE' + # Returns a dictionary containing normalized + # frequency information (proportionas) for the + # syntactic relations and categories in the text. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.syntacticProfileNormed)) + elif messagelist[0] == 'QUOTEDTEXT': + command = 'QUOTEDTEXT' + # 1 for tokens within quotation marks, 0 for other text + # Position in the list corresponds to offset of token + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_quoted') + )) + elif messagelist[0] == 'DIRECTSPEECHSPANS': + command = 'DIRECTSPEECHSPANS' + # Data about subset of quoted text -- specifically, + # quoted text that is attributed to a specific + # speaker. + # + # Returns a list of lists with three top level + # elements: + # + # 1. Speaker: a list of offsets to tokens + # referring to the speaker(s) + # 2. Addressee: a list of offsets to tokens + # referring to the person(s) spoken to. + # 3. Span start offset + # 4. Span end offset. + # + # Note that first and second person pronouns + # inside direct speech may reference a person + # explicitly identified in the direct speech + # framing text. Coreferee reference resolution + # may apply, so that the speaker and addressee + # references may be to a proper noun at the head + # of a pronominal reference chain that includes + # the direct speech frame. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech') + )) + elif messagelist[0] == 'IN_DIRECT_SPEECH': + # 1 for tokens within quoted stretches of direct speech, + # 0 for other text. Position in the list corresponds to + # offset of token in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_in_direct_speech') + )) + elif messagelist[0] == 'TENSECHANGES': + # list of positions where tense changed in the main + # document flow (not in direct speech/quotations, + # with flag to indicate whether shift was to past + # tense or to present tense. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.vwp_tense_changes)) + elif messagelist[0] == 'PERSPECTIVES': + # list of positions where perspective is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_perspective') + )) + elif messagelist[0] == 'ATTRIBUTIONS': + # list of positions where attribution is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_attribution') + )) + elif messagelist[0] == 'SOURCES': + # list of positions where source is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_source') + )) + elif messagelist[0] == 'CITES': + # list of positions where source is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_cite') + )) + elif messagelist[0] == 'STATEMENTSOFFACT': + # list of positions where source is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_fact') + )) + elif messagelist[0] == 'STATEMENTSOFOPINION': + # list of positions where source is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_opinion') + )) + elif messagelist[0] == 'PERSPECTIVESPANS': + command = 'PERSPECTIVESPANS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.vwp_perspective_spans)) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='vwp_perspective_spans') + ) + elif messagelist[0] == 'STANCEMARKERS': + command = 'STANCEMARKERS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.vwp_stance_markers)) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='vwp_stance_markers') + ) + + elif messagelist[0] == 'CLAIMTEXTS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_claim') + )) + + elif messagelist[0] == 'DISCUSSIONTEXTS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_discussion') + )) + + elif messagelist[0] == 'EMOTIONWORDS': + command = 'EMOTIONWORDS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_emotionword') + )) + + elif messagelist[0] == 'CHARACTERWORDS': + command = 'CHARACTERWORDS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_character_traits') + )) + + elif messagelist[0] == 'EMOTIONALSTATES': + command = 'EMOTIONALSTATES' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='vwp_emotion_states') + ) + elif messagelist[0] == 'CHARACTERTRAITS': + command = 'CHARACTERTRAITS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.vwp_character_traits)) + elif messagelist[0] == 'PROPOSITIONALATTITUDES': + command = 'PROPOSITIONALATTITUDES' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_propositional_attitudes') + )) + elif messagelist[0] == 'SOCIAL_AWARENESS': + command = 'SOCIAL_AWARENESS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_social_awareness') + )) + elif messagelist[0] == 'CONCRETEDETAILS': + command = 'CONCRETEDETAILS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(indicator='concrete_detail') + ) + elif messagelist[0] == 'INTERACTIVELANGUAGE': + command = 'INTERACTIVELANGUAGE' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_interactive') + )) + elif messagelist[0] == 'ARGUMENTWORDS': + command = 'ARGUMENTWORDS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_argumentword') + )) + elif messagelist[0] == 'ARGUMENTLANGUAGE': + command = 'ARGUMENTLANGUAGE' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_argumentation') + )) + elif messagelist[0] == 'EXPLICITARGUMENTWORDS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_explicit_argument') + )) + elif messagelist[0] == 'SUBJECTIVITYRATINGS': + command = 'SUBJECTIVITYRATINGS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='subjectivity') + )) + elif messagelist[0] == 'SENTIMENTRATINGS': + command = 'SENTIMENTRATINGS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_sentiment') + )) + elif messagelist[0] == 'TONERATINGS': + command = 'TONERATINGS2' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_tone') + )) + elif messagelist[0] == 'POLARITYRATINGS': + command = 'POLARITYRATINGS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='polarity') + )) + elif messagelist[0] == 'ASSESSMENTS': + command = 'ASSESSMENTS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.assessments)) + elif messagelist[0] == 'PASTTENSESCOPE': + command = 'PASTTENSESCOPE' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='in_past_tense_scope') + )) + elif messagelist[0] == 'GOVERNINGSUBJECTS': + command = 'GOVERNINGSUBJECTS' + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='governing_subject') + )) + elif messagelist[0] == 'CLUSTERS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='clusterID') + )) + elif messagelist[0] == 'PROMPTLANGUAGE': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.prompt_language)) + elif messagelist[0] == 'PROMPTRELATED': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.prompt_related)) + elif messagelist[0] == 'MAINIDEAS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='main_ideas') + ) + elif messagelist[0] == 'SUPPORTINGIDEAS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='supporting_ideas') + ) + elif messagelist[0] == 'SUPPORTINGDETAILS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='supporting_details') + ) + elif messagelist[0] == 'CLUSTERINFO': + command = 'CLUSTERINFO' + # Get the local word clusters our algorithm has + # clustered the words of the student document into + # + # The data is a list of records in this format: + # 1. The clusterID. + # 2. The cluster rating, which is roughly a measure + # of how important the cluster seems to be in the + # docyument as measured by the number of words in it + # and their relative infrequency + # 3. A list of the actual word strings in each cluster + # 4. The offsets of the words assigned to each cluster + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.clusterInfo)) + elif messagelist[0] == 'DEVWORDS': + command = 'DEVWORDS' + # offset of the logical subject that governs + # the domain this token belongs to + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='devword') + )) + elif messagelist[0] == 'NOMINALREFERENCES': + command = 'NOMINALREFERENCES' + # offset of the logical subject that governs + # the domain this token belongs to + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.nominalReferences)) + elif messagelist[0] == 'DOCSUMMARYLABELS': + command = 'DOCSUMMARYLABELS' + await websocket.send(json.dumps(self.summaryLabels)) + elif messagelist[0] == 'DOCSUMMARYFEATS': + command = 'DOCSUMMARYFEATS' + label = messagelist[1] + doc = self.parser.get_document(label) + summaryFeats = [ + doc._.AWE_Info(indicator='nSyll',summaryType="mean"), + doc._.AWE_Info(indicator='nSyll',summaryType="median"), + doc._.AWE_Info(indicator='nSyll',summaryType="max"), + doc._.AWE_Info(indicator='nSyll',summaryType="min"), + doc._.AWE_Info(indicator='nSyll',summaryType="stdev"), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='mean'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='median'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='max'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='min'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='stdev'), + doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='mean'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='median'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='max'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='min'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='stdev'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='mean'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='median'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='max'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='min'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='stdev'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='max_freq',summaryType='mean'), + doc._.AWE_Info(indicator='max_freq',summaryType='median'), + doc._.AWE_Info(indicator='max_freq',summaryType='max'), + doc._.AWE_Info(indicator='max_freq',summaryType='min'), + doc._.AWE_Info(indicator='max_freq',summaryType='stdev'), + doc._.AWE_Info(indicator='abstract_trait',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='animate',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='deictic',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='root', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='lemma_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='lower_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(infoType="Doc",indicator='delimiter_n',summaryType='total'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='proportion'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='total'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',transformations=['text'],summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sents',summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='stdev'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='mean'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='median'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='max'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='min'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='stdev'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='mean'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='median'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='max'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='min'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='stdev'), + doc._.syntacticVariety, + doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_argumentation',summaryType='proportion'), + doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_egocentric',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_allocentric',summaryType='proportion'), + doc._.AWE_Info(indicator='subjectivity',summaryType='mean'), + doc._.AWE_Info(indicator='subjectivity',summaryType='median'), + doc._.AWE_Info(indicator='subjectivity',summaryType='min'), + doc._.AWE_Info(indicator='subjectivity',summaryType='max'), + doc._.AWE_Info(indicator='subjectivity',summaryType='stdev'), + doc._.AWE_Info(indicator='polarity',summaryType='mean'), + doc._.AWE_Info(indicator='polarity',summaryType='median'), + doc._.AWE_Info(indicator='polarity',summaryType='min'), + doc._.AWE_Info(indicator='polarity',summaryType='max'), + doc._.AWE_Info(indicator='polarity',summaryType='stdev'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='mean'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='median'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='min'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='max'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(indicator='devword', summaryType='proportion'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev') + ] + await websocket.send(json.dumps(summaryFeats)) + else: + await websocket.send(False) + +# --- [ MAIN ] -------------------------------------------------------------------------- + +if __name__ == '__main__': + print('parser server loading') + wsc = parserServer() + +# --- [ END ] --------------------------------------------------------------------------- diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index 29ef26d..6750638 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -1,1514 +1,1253 @@ -""" ---- [ Test: parserServer.py ] ----------------------------------------------------------- - -Main server for parsing commands regarding spacy pipeline. - -@8/20/2024: modifications made to function without the use of holmes-extractor; this is -to make AWE_Workbench far easier to manage regarding dependency issues. - -Author: Caleb Scott (cwscott3@ncsu.edu) - -Copyright 2022, Educational Testing Service - ------------------------------------------------------------------------------------------ -""" - -# --- [ IMPORTS ] ----------------------------------------------------------------------- - -# Basic lib imports -import asyncio -import base64 -import websockets -import json -import spacy -import coreferee -import spacytextblob.spacytextblob - -# AWE imports -from awe_components.components.utility_functions import content_pos -import awe_components.components.lexicalFeatures -import awe_components.components.syntaxDiscourseFeats -import awe_components.components.viewpointFeatures -import awe_components.components.lexicalClusters -import awe_components.components.contentSegmentation -from awe_workbench.pipeline import pipeline_def - -# --- [ CONSTS/VARS ] ------------------------------------------------------------------- - -HOST = 'localhost' -PORT = 8766 -MAX_DATA_LIMIT = 2 ** 24 -SPACY_MODEL = 'en_core_web_lg' -COMPONENTS = [el['component'] for el in pipeline_def] -AWE_INFO_KEYS = ['indicator', 'infoType', 'summaryType', 'filters', 'transformations'] - -# --- [ CLASSES ] ----------------------------------------------------------------------- - -class parserServer: - - def __init__(self): - - # Set up the NLP pipeline - print("initializing NLP pipeline...") - try: - self.nlp = spacy.load(SPACY_MODEL) - for comp in COMPONENTS: - self.nlp.add_pipe(comp) - except OSError as e: - print("There was an error loading 'en_core_web_lg' from spacy.") - raise OSError() from e - - # Instead of using holmes, we save the docs in memory - self.docs = {} - self.partial = "" - - # Start the event loop, and run until the kill command - print("starting event loop -- use [KILL] command to terminate.") - asyncio.get_event_loop().run_until_complete( - websockets.serve(self.run_parser, HOST, PORT, max_size=MAX_DATA_LIMIT)) - print('parser server running...') - asyncio.get_event_loop().run_forever() - print('parser server terminated...') - - async def kill(self, websocket): - """ - Command called to kill the parser server. - """ - self.parser.close() - await websocket.send(json.dumps(True)) - await websocket.close() - exit() - - def clear_parsed(self): - """ - Resets the document store to an empty mapping. - """ - self.docs = {} - return True - - def remove(self, label): - """ - Removes a document from the document store. - """ - del self.docs[label] - return True - - def parse_one(self, label, text): - """ - Parses a single document, and adds it to the document store. - - NOTE: we overwrite documents with the same key. - """ - self.docs[label] = self.partial + self.nlp(text) - self.partial = "" - print(f"parsed document: {label}") - return True - - def partial_text(self, text): - """ - Adds partial text to be processed in the future. - """ - self.partial += text - return True - - def parse_set(self, doc_set): - """ - Parses a document list of tuples (labels, text). - """ - for label, text in doc_set: - self.parse_one(label, text) - return True - - def labels(self): - """ - Returns a list of all document labels. - """ - return list(self.docs.keys()) - - def serialized(self, label): - """ - Returns a serialized document, selected by label - """ - return base64.b64encode(self.docs[label]) - - def new_search_phrase(self): - pass - - def remove_labeled_search(self): - pass - - def clear_searches(self): - pass - - def show_search_labels(self): - pass - - def match_documents(self): - pass - - def frequencies(self): - pass - - def topic_matches(self): - pass - - def awe_info(self, label, *args): - """ - Returns information specified in an AWE_Info object. - - This information is determined by: - * indic - indicator name - * itype - information type - * summ - summary type - * filt - filters - * trans - transformations - """ - doc = self.docs[label] - kwargs = {} - - # Get the appropriate arguments for AWE_Info - # Since we have a list of values, we need to map them first - for i, val in enumerate(args): - kwargs[AWE_INFO_KEYS[i]] = val - if not kwargs: - return None - else: - return doc._.AWE_Info(**kwargs) - - def fast_map_awe_info(self, command): - """ - Maps to awe_info(), given a simple command - """ - pass - - def doc_heads(self, label): - """ - Returns list of token heads for a given document. - """ - doc = self.docs[label] - return [token.head.i for token in doc] - - def pos(self, label): - """ - Returns positions of tokens for a given document. - """ - doc = self.docs[label] - return [token.pos_ for token in doc] - - def doc_dependencies(self, label): - """ - Returns dependencies of tokens for a given document. - """ - doc = self.docs[label] - return [token.dep_ for token in doc] - - def doc_entities(self, label): - """ - Returns all entities for a given document. - """ - doc = self.docs[label] - return [ - [ - ent.text, - ent.start_char, - ent.end_char, - ent.label_ - ] for ent in doc.ents - ] - - def tok_vecs(self, label): - """ - Returns token vectors for a given document. - """ - doc = self.docs[label] - return doc._.token_vectors - - summaryLabels = [ - 'mean_nSyll', - 'med_nSyll', - 'max_nSyll', - 'min_nSyll', - 'std_nSyll', - 'mean_sqnChars', - 'med_sqnChars', - 'max_sqnChars', - 'min_sqnChars', - 'std_sqnChars', - 'propn_latinate', - 'propn_academic', - 'mean_family_size', - 'med_family_size', - 'max_family_size', - 'min_family_size', - 'std_family_size', - 'mean_concreteness', - 'med_concreteness', - 'max_concreteness', - 'min_concreteness', - 'std_concreteness', - 'mean_logNSenses', - 'med_logNSenses', - 'max_logNSenses', - 'min_logNSenses', - 'std_logNSenses', - 'mean_nMorph', - 'med_nMorph', - 'max_nMorph', - 'min_nMorph', - 'std_nMorph', - 'mean_logfreq_HAL', - 'med_logfreq_HAL', - 'max_logfreq_HAL', - 'min_logfreq_HAL', - 'std_logfreq_HAL', - 'mean_root_fam_size', - 'med_root_fam_size', - 'max_root_fam_size', - 'min_root_fam_size', - 'std_root_fam_size', - 'mean_root_pfmf', - 'med_root_pfmf', - 'max_root_pfmf', - 'min_root_pfmf', - 'std_root_pfmf', - 'mean_token_frequency', - 'median_token_frequency', - 'max_token_frequency', - 'min_token_frequency', - 'std_token_frequency', - 'mean_lemma_frequency', - 'median_lemma_frequency', - 'max_lemma_frequency', - 'min_lemma_frequency', - 'std_lemma_frequency', - 'mean_max_frequency', - 'median_max_frequency', - 'max_max_frequency', - 'min_max_frequency', - 'std_max_frequency', - 'propn_abstract_traits', - 'propn_animates', - 'propn_deictics', - 'wf_type_count', - 'lemma_type_count', - 'type_count', - 'token_count', - 'paragraph_count', - 'mean_paragraph_length', - 'median_paragraph_length', - 'max_paragraph_length', - 'min_paragraph_length', - 'stdev_paragraph_length', - 'propn_transition_words', - 'transition_category_count', - 'transition_word_type_count', - 'mean_transition_distance', - 'median_transition_distance', - 'max_transition_distance', - 'min_transition_distance', - 'stdev_transition_distance', - 'mean_sent_cohesion', - 'median_sent_cohesion', - 'max_sent_cohesion', - 'min_sent_cohesion', - 'stdev_sent_cohesion', - 'mean_slider_cohesion', - 'median_slider_cohesion', - 'max_slider_cohesion', - 'min_slider_cohesion', - 'stdev_slider_cohesion', - 'num_corefs', - 'mean_coref_chain_len', - 'median_coref_chain_len', - 'max_coref_chain_len', - 'min_coref_chain_len', - 'stdev_coref_chain_len', - 'sentence_count', - 'mean_sentence_len', - 'median_sentence_len', - 'max_sentence_len', - 'min_sentence_len', - 'std_sentence_len', - 'mean_words_to_sentence_root', - 'median_words_to_sentence_root', - 'max_words_to_sentence_root', - 'min_words_to_sentence_root', - 'stdev_words_to_sentence_root', - 'meanRhemeDepth', - 'medianRhemeDepth', - 'maxRhemeDepth', - 'minRhemeDepth', - 'stdevRhemeDepth', - 'meanThemeDepth', - 'medianThemeDepth', - 'maxThemeDepth', - 'minThemeDepth', - 'stdevThemeDepth', - 'meanWeightedDepth', - 'medianWeightedDepth', - 'maxWeightedDepth', - 'minWeightedDepth', - 'stdevWeightedDepth', - 'meanWeightedBreadth', - 'medianWeightedBreadth', - 'maxWeightedBreadth', - 'minWeightedBreadth', - 'stdevWeightedBreadth', - 'syntacticVariety', - 'propn_past', - 'propn_argument_words', - 'propn_direct_speech', - 'propn_egocentric', - 'propn_allocentric', - 'mean_subjectivity', - 'median_subjectivity', - 'min_subjectivity', - 'max_subjectivity', - 'stdev_subjectivity', - 'mean_polarity', - 'median_polarity', - 'min_polarity', - 'max_polarity', - 'stdev_polarity', - 'mean_sentiment', - 'median_sentiment', - 'min_sentiment', - 'max_sentiment', - 'stdev_sentiment', - 'mean_main_cluster_span', - 'median_main_cluster_span', - 'min_main_cluster_span', - 'max_main_cluster_span', - 'stdev_main_cluster_span', - 'propn_devwords', - 'mean_devword_nsyll', - 'median_devword_nsyll', - 'min_devword_nsyll', - 'max_devword_nsyll', - 'stdev_devword_nsyll', - 'mean_devword_nmorph', - 'median_devword_nmorph', - 'min_devword_nmorph', - 'max_devword_nmorph', - 'stdev_devword_nmorph', - 'mean_devword_nsenses', - 'median_devword_nsenses', - 'min_devword_nsenses', - 'max_devword_nsenses', - 'stdev_devword_nsenses', - 'mean_devword_token_freq', - 'median_devword_token_freq', - 'min_devword_token_freq', - 'max_devword_token_freq', - 'stdev_devword_token_freq', - 'mean_devword_concreteness', - 'median_devword_concreteness', - 'min_devword_concreteness', - 'max_devword_concreteness', - 'stdev_devword_concreteness' - ] - - async def run_parser(self, websocket, path): - current_doc = '' - async for message in websocket: - - messagelist = json.loads(message) - print(messagelist) - command = '' - if messagelist[0] == 'KILL': - command = 'KILL' - await websocket.send(json.dumps(True)) - await self.kill(websocket) - elif messagelist[0] == 'CLEARPARSED': - command = 'CLEARPARSED' - self.parser.remove_all_documents() - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'REMOVE': - command = 'REMOVE' - label = messagelist[1] - self.parser.remove_document(label) - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'PARSEONE': - command = 'PARSEONE' - label = messagelist[1] - text = current_doc + messagelist[2] - current_doc = '' - if label in self.parser.list_document_labels(): - self.parser.remove_document(label) - self.parser.parse_and_register_document(text, label) - doc = self.parser.get_document(label) - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'PARTIALTEXT': - current_document += messagelist[2] - elif messagelist[0] == 'PARSESET': - command = 'PARSESET' - results = [] - [labels, texts] = messagelist[1] - for i, text in enumerate(texts): - text = texts[i] - print('parsed document', str(i+1), 'of', len(texts)) - if text is not None and len(text) > 0: - if labels[i] in self.parser.list_document_labels(): - self.parser.remove_document(labels[i]) - self.parser.parse_and_register_document( - text, labels[i]) - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'LABELS': - command = 'LABELS' - labels = self.parser.list_document_labels() - await websocket.send(json.dumps(labels)) - elif messagelist[0] == 'SERIALIZED': - command = 'SERIALIZED' - label = messagelist[1] - serialized = base64.b64encode( - self.parser.serialize_document(label)) - await websocket.send(serialized) - elif messagelist[0] == 'NEWSEARCHPHRASE': - command = 'NEWSEARCHPHRASE' - search_phrase_text = messagelist[1] - label = messagelist[2] - ok = self.parser.register_search_phrase(search_phrase_text) - await websocket.send(ok) - elif messagelist[0] == 'REMOVELABELEDSEARCH': - command = 'REMOVELABELEDSEARCH' - label = messagelist[1] - self.parser.remove_all_search_phrases_with_label(label) - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'CLEARSEARCHES': - command = 'CLEARSEARCHES' - self.parser.remove_all_search_phrases() - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'SHOWSEARCHLABELS': - command = 'SHOWSEARCHLABELS' - labels = self.parser.list_search_phrase_labels() - await websocket.send(json.dumps(labels)) - elif messagelist[0] == 'MATCH_DOCUMENTS': - command = 'MATCH_DOCUMENTS' - matches = self.parser.match() - await websocket.send(json.dumps(matches)) - elif messagelist[0] == 'FREQUENCIES': - command = 'FREQUENCIES' - freqinfo = self.parser.get_corpus_frequency_information() - await websocket.send(json.dumps(freqinfo)) - elif messagelist[0] == 'TOPIC_MATCHES': - command = 'TOPIC_MATCHES' - text_to_match = messagelist[1] - # This search takes a long list of keyword parameters, - # all of them with preset default thresholds. TBD: - # expose all of these parameters in more complex topic - # match functionality. Holmes extractor documentation - # describes what each of these parameters involves. - matches = self.parser.topic_match_documents_against( - text_to_match, - word_embedding_match_threshold=.42, - relation_score=20, - reverse_only_relation_score=15, - single_word_score=10, - single_word_any_tag_score=5, - different_match_cutoff_score=10, - relation_matching_frequency_threshold=0.0, - embedding_matching_frequency_threshold=0.0, - use_frequency_factor=True) - await websocket.send(json.dumps(matches)) - # Holmes Extractor also has supervised topic model - # building facilities using the functions - # get_supervised_topic_training_basis(), - # and deserialize_supervised_topic_classifier(). - # TBD: Add support for Holmes supervised topic model - # building. - elif messagelist[0] == 'AWE_INFO': - label = messagelist[1] - doc = self.parser.get_document(label) - indic = None - itype = None - summ = None - filt = None - if len(messagelist) == 3: - indic = messagelist[2] - await websocket.send( - doc._.AWE_Info(indicator=indic)) - elif len(messagelist) == 4: - indic = messagelist[2] - itype = messagelist[3] - await websocket.send( - doc._.AWE_Info(indicator=indic,infoType=itype)) - elif len(messagelist) == 5: - indic = messagelist[2] - itype = messagelist[3] - summ = messagelist[4] - result = \ - doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ) - if type(result) in [int, float, bool]: - await websocket.send(str(result)) - else: - await websocket.send(result) - - elif len(messagelist) == 6: - indic = messagelist[2] - itype = messagelist[3] - summ = messagelist[4] - filt = json.loads(messagelist[5]) - result = \ - doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt) - if type(result) in [int, float]: - await websocket.send(str(result)) - else: - await websocket.send(result) - elif len(messagelist) == 7: - indic = messagelist[2] - itype = messagelist[3] - summ = messagelist[4] - filt = json.loads(messagelist[5]) - trans = json.loads(messagelist[6]) - result = \ - doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt,transformations=trans) - if type(result) in [int, float]: - await websocket.send(str(result)) - else: - await websocket.send(result) - - else: - await websocket.send(json.dumps([])) - elif messagelist[0] == 'DOCTOKENS': - label = messagelist[1] - doc = self.parser.get_document(label) - if doc is not None: - await websocket.send( - doc._.AWE_Info(indicator='text')) - else: - await websocket.send(json.dumps([])) - elif messagelist[0] == 'DOCTOKENS_WITH_WS': - label = messagelist[1] - doc = self.parser.get_document(label) - if doc is not None: - await websocket.send( - doc._.AWE_Info(indicator='text_with_ws')) - else: - await websocket.send(json.dumps([])) - elif messagelist[0] == 'DOCHEADS': - command = 'DOCHEADS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - heads = [token.head.i for token in doc] - await websocket.send(json.dumps(heads)) - elif messagelist[0] == 'POS': - command = 'POS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - heads = [token.pos_ for token in doc] - await websocket.send(json.dumps(heads)) - elif messagelist[0] == 'DOCDEPENDENCIES': - command = 'DOCDEPENDENCIES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - deps = [token.dep_ for token in doc] - await websocket.send(json.dumps(deps)) - elif messagelist[0] == 'DOCENTITIES': - command = 'DOCENTITIES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - ents = [[ent.text, - ent.start_char, - ent.end_char, - ent.label_] for ent in doc.ents] - await websocket.send(json.dumps(ents)) - elif messagelist[0] == 'TOKVECS': - command = 'TOKVECS' - # List returned contains lists pairing token - # offset with token vectors cast as strings - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.token_vectors)) - elif messagelist[0] == 'LEMMAS': - command = 'LEMMAS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(indicator='lemma_') - ) - elif messagelist[0] == 'STOPWORDS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(indicator='is_stop') - ) - elif messagelist[0] == 'WORDTYPES': - command = 'WORDTYPES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='lower_',filters=[('is_alpha', ['True']),('is_stop', ['False'])],summaryType = 'uniq') - )) - elif messagelist[0] == 'ROOTS': - command = 'ROOTS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root') - )) - elif messagelist[0] == 'SYLLABLES': - command = 'SYLLABLES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nSyll'))) - elif messagelist[0] == 'WORDLENGTH': - command = 'WORDLENGTH' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt']) - )) - elif messagelist[0] == 'LATINATES': - command = 'LATINATES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'ACADEMICS': - command = 'ACADEMICS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'SENSENUMS': - command = 'SENSENUMS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'LOGSENSENUMS': - command = 'LOGSENSENUMS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])],transformations=['log']) - )) - elif messagelist[0] == 'MORPHOLOGY': - command = 'MORPHOLOGY' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='morphology') - )) - elif messagelist[0] == 'MORPHNUMS': - command = 'MORPHNUMS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nMorph',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'HALROOTFREQS': - command = 'HALROOTFREQS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'HALLOGROOTFREQS': - command = 'HALLOGROOTFREQS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log']) - )) - elif messagelist[0] == 'ROOTFAMSIZES': - command = 'ROOTFAMSIZES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'ROOTPFMFS': - command = 'ROOTPFMFS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'FAMILYSIZES': - command = 'FAMILYSIZES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='family_size',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'TOKFREQS': - command = 'TOKFREQS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'LEMMAFREQS': - command = 'LEMMAfREQS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='lemma_freq'))) - elif messagelist[0] == 'ROOTFREQS': - command = 'ROOTFREQS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root_Freq'))) - elif messagelist[0] == 'MAXFREQS': - command = 'MAXFREQS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='max_freq'))) - elif messagelist[0] == 'CONCRETES': - command = 'CONCRETES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='concreteness'))) - elif messagelist[0] == 'ABSTRACTTRAITS': - command = 'ABSTRACTTRAITS' - # Position in the list returned equals position - # in the document. Flag 1 if the word names an - # abstract trait, 0 otherwise - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='abstract_trait'))) - elif messagelist[0] == 'ANIMATES': - command = 'ANIMATES' - # Position in the list returned equals position - # in the document. Flag 1 if the word names an animate - # entity, 0 otherwise - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='animate'))) - elif messagelist[0] == 'LOCATIONS': - command = 'LOCATIONS' - # Position in the list returned equals position - # in the document. Flag 1 if the word names an - # animate entity, 0 otherwise - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='location'))) - elif messagelist[0] == 'DEICTICS': - command = 'DEICTICS' - # Position in the list returned equals position in - # the document. Flag 1 if the word names a deictic - # element, 0 otherwise - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='deictic'))) - elif messagelist[0] == 'PARAGRAPHS': - command = 'PARAGRAPHS' - # Items in the list indicate word offsets in the document - # at which paragraph breaks appear - label = messagelist[1] - doc = self.parser.get_document(label) - - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='delimiter_n') - )) - # doc._.paragraph_breaks)) - elif messagelist[0] == 'SENTENCES': - command = 'SENTENCES' - # Items in the list indicate word offsets in the document - # at which paragraph breaks appear - label = messagelist[1] - doc = self.parser.get_document(label) - - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='sents') - )) - #await websocket.send(json.dumps( - # [(sent.start, sent.end) for sent in doc.sents])) - elif messagelist[0] == 'PARAGRAPHLENS': - command = 'PARAGRAPHLENS' - # Items in the list indicate lengths of paragraphs listed - # by offset in GETPARAGRAPHS - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen']) - )) - elif messagelist[0] == 'TRANSITIONPROFILE': - command = 'TRANSITIONPROFILE' - # Returns a rich data structure in a list containing - # (1) total number of transition words in the document - # (2) a dictionary that lists the frequency of a predefined - # set of transition word categories. - # (3) a dictionary that lists the frequency of individual - # transition words - # (4) a list of lists that provides for each transition - # word the word string, its start and stop offsets, - # and its transition word category. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.transition_word_profile)) - elif messagelist[0] == 'TRANSITIONS': - command = 'TRANSITIONS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='transitions') - )) - elif messagelist[0] == 'TRANSITIONDISTANCES': - command = 'TRANSITIONDISTANCES' - # List of cosine distances between ten-word windows - # before and after a transition - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='transition_distances') - )) - elif messagelist[0] == 'SENTENCECOHESIONS': - command = 'SENTENCECOHESIONS' - # List of cosine distances between ten-word windows - # before and after a sentence boundary - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='intersentence_cohesions') - )) - elif messagelist[0] == 'SLIDERCOHESIONS': - command = 'SLIDERCOHESIONS' - # List of cosine distances between ten-word windows - # before and after a sliding window through the text - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='sliding_window_cohesions') - )) - elif messagelist[0] == 'COREFCHAINS': - command = 'COREFCHAINS' - # List of coreference chains found in document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.coref_chains)) - elif messagelist[0] == 'RHEMEDEPTHS': - command = 'RHEMEDEPTHS' - # Syntactic depth of the sentence rheme -- part of - # sentence after the main verb where new information - # is usually placed - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='syntacticDepthsOfRhemes') - )) - elif messagelist[0] == 'THEMEDEPTHS': - command = 'THEMEDEPTHS' - # Syntactic depth of the sentence theme -- part - # of sentence before the main verb where given - # information is usually placed - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='syntacticDepthsOfThemes') - )) - elif messagelist[0] == 'WEIGHTEDDEPTHS': - command = 'WEIGHTEDDEPTHS' - # Syntactic depth weighted to penalize - # left-embedded structures - # that tend to be harder to process - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='weightedSyntacticDepths') - )) - elif messagelist[0] == 'WEIGHTEDBREADTHS': - command = 'WEIGHTEDBREADTHS' - # Syntactic breadth -- measure of extent to which sentence - # structure is additive, consisting of coordinated - # structures and loosely appended modifiers typical of - # spoken, often unplanned sentence production - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='weightedSyntacticBreadths') - )) - elif messagelist[0] == 'SENTENCETYPES': - # tuple giving number and location of sentence types - # format: - # (1,1,1,1,[1,2,3,4]) would be the record for a text that - # had four sentences -- simple sentence, compound sentence, - # complex sentence, and compound/complex sentence, in - # that order. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='sentence_types') - )) - elif messagelist[0] == 'SYNTACTICPROFILE': - command = 'SYNTACTICPROFILE' - # Returns a dictionary containing frequency information - # about the syntactic relations and categories in the text. - # This includes information about the frequency of parts - # of speech, morphological categories, and syntactic - # dependencies between specific parts of speech. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.syntacticProfile)) - elif messagelist[0] == 'NORMEDSYNTACTICPROFILE': - command = 'NORMEDSYNTACTICPROFILE' - # Returns a dictionary containing normalized - # frequency information (proportionas) for the - # syntactic relations and categories in the text. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.syntacticProfileNormed)) - elif messagelist[0] == 'QUOTEDTEXT': - command = 'QUOTEDTEXT' - # 1 for tokens within quotation marks, 0 for other text - # Position in the list corresponds to offset of token - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_quoted') - )) - elif messagelist[0] == 'DIRECTSPEECHSPANS': - command = 'DIRECTSPEECHSPANS' - # Data about subset of quoted text -- specifically, - # quoted text that is attributed to a specific - # speaker. - # - # Returns a list of lists with three top level - # elements: - # - # 1. Speaker: a list of offsets to tokens - # referring to the speaker(s) - # 2. Addressee: a list of offsets to tokens - # referring to the person(s) spoken to. - # 3. Span start offset - # 4. Span end offset. - # - # Note that first and second person pronouns - # inside direct speech may reference a person - # explicitly identified in the direct speech - # framing text. Coreferee reference resolution - # may apply, so that the speaker and addressee - # references may be to a proper noun at the head - # of a pronominal reference chain that includes - # the direct speech frame. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech') - )) - elif messagelist[0] == 'IN_DIRECT_SPEECH': - # 1 for tokens within quoted stretches of direct speech, - # 0 for other text. Position in the list corresponds to - # offset of token in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_in_direct_speech') - )) - elif messagelist[0] == 'TENSECHANGES': - # list of positions where tense changed in the main - # document flow (not in direct speech/quotations, - # with flag to indicate whether shift was to past - # tense or to present tense. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.vwp_tense_changes)) - elif messagelist[0] == 'PERSPECTIVES': - # list of positions where perspective is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_perspective') - )) - elif messagelist[0] == 'ATTRIBUTIONS': - # list of positions where attribution is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_attribution') - )) - elif messagelist[0] == 'SOURCES': - # list of positions where source is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_source') - )) - elif messagelist[0] == 'CITES': - # list of positions where source is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_cite') - )) - elif messagelist[0] == 'STATEMENTSOFFACT': - # list of positions where source is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_fact') - )) - elif messagelist[0] == 'STATEMENTSOFOPINION': - # list of positions where source is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_opinion') - )) - elif messagelist[0] == 'PERSPECTIVESPANS': - command = 'PERSPECTIVESPANS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.vwp_perspective_spans)) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='vwp_perspective_spans') - ) - elif messagelist[0] == 'STANCEMARKERS': - command = 'STANCEMARKERS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.vwp_stance_markers)) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='vwp_stance_markers') - ) - - elif messagelist[0] == 'CLAIMTEXTS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_claim') - )) - - elif messagelist[0] == 'DISCUSSIONTEXTS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_discussion') - )) - - elif messagelist[0] == 'EMOTIONWORDS': - command = 'EMOTIONWORDS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_emotionword') - )) - - elif messagelist[0] == 'CHARACTERWORDS': - command = 'CHARACTERWORDS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_character_traits') - )) - - elif messagelist[0] == 'EMOTIONALSTATES': - command = 'EMOTIONALSTATES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='vwp_emotion_states') - ) - elif messagelist[0] == 'CHARACTERTRAITS': - command = 'CHARACTERTRAITS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.vwp_character_traits)) - elif messagelist[0] == 'PROPOSITIONALATTITUDES': - command = 'PROPOSITIONALATTITUDES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_propositional_attitudes') - )) - elif messagelist[0] == 'SOCIAL_AWARENESS': - command = 'SOCIAL_AWARENESS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_social_awareness') - )) - elif messagelist[0] == 'CONCRETEDETAILS': - command = 'CONCRETEDETAILS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(indicator='concrete_detail') - ) - elif messagelist[0] == 'INTERACTIVELANGUAGE': - command = 'INTERACTIVELANGUAGE' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_interactive') - )) - elif messagelist[0] == 'ARGUMENTWORDS': - command = 'ARGUMENTWORDS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_argumentword') - )) - elif messagelist[0] == 'ARGUMENTLANGUAGE': - command = 'ARGUMENTLANGUAGE' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_argumentation') - )) - elif messagelist[0] == 'EXPLICITARGUMENTWORDS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_explicit_argument') - )) - elif messagelist[0] == 'SUBJECTIVITYRATINGS': - command = 'SUBJECTIVITYRATINGS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='subjectivity') - )) - elif messagelist[0] == 'SENTIMENTRATINGS': - command = 'SENTIMENTRATINGS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_sentiment') - )) - elif messagelist[0] == 'TONERATINGS': - command = 'TONERATINGS2' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_tone') - )) - elif messagelist[0] == 'POLARITYRATINGS': - command = 'POLARITYRATINGS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='polarity') - )) - elif messagelist[0] == 'ASSESSMENTS': - command = 'ASSESSMENTS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.assessments)) - elif messagelist[0] == 'PASTTENSESCOPE': - command = 'PASTTENSESCOPE' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='in_past_tense_scope') - )) - elif messagelist[0] == 'GOVERNINGSUBJECTS': - command = 'GOVERNINGSUBJECTS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='governing_subject') - )) - elif messagelist[0] == 'CLUSTERS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='clusterID') - )) - elif messagelist[0] == 'PROMPTLANGUAGE': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.prompt_language)) - elif messagelist[0] == 'PROMPTRELATED': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.prompt_related)) - elif messagelist[0] == 'MAINIDEAS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='main_ideas') - ) - elif messagelist[0] == 'SUPPORTINGIDEAS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='supporting_ideas') - ) - elif messagelist[0] == 'SUPPORTINGDETAILS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='supporting_details') - ) - elif messagelist[0] == 'CLUSTERINFO': - command = 'CLUSTERINFO' - # Get the local word clusters our algorithm has - # clustered the words of the student document into - # - # The data is a list of records in this format: - # 1. The clusterID. - # 2. The cluster rating, which is roughly a measure - # of how important the cluster seems to be in the - # docyument as measured by the number of words in it - # and their relative infrequency - # 3. A list of the actual word strings in each cluster - # 4. The offsets of the words assigned to each cluster - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.clusterInfo)) - elif messagelist[0] == 'DEVWORDS': - command = 'DEVWORDS' - # offset of the logical subject that governs - # the domain this token belongs to - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='devword') - )) - elif messagelist[0] == 'NOMINALREFERENCES': - command = 'NOMINALREFERENCES' - # offset of the logical subject that governs - # the domain this token belongs to - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.nominalReferences)) - elif messagelist[0] == 'DOCSUMMARYLABELS': - command = 'DOCSUMMARYLABELS' - await websocket.send(json.dumps(self.summaryLabels)) - elif messagelist[0] == 'DOCSUMMARYFEATS': - command = 'DOCSUMMARYFEATS' - label = messagelist[1] - doc = self.parser.get_document(label) - summaryFeats = [ - doc._.AWE_Info(indicator='nSyll',summaryType="mean"), - doc._.AWE_Info(indicator='nSyll',summaryType="median"), - doc._.AWE_Info(indicator='nSyll',summaryType="max"), - doc._.AWE_Info(indicator='nSyll',summaryType="min"), - doc._.AWE_Info(indicator='nSyll',summaryType="stdev"), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='mean'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='median'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='max'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='min'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='stdev'), - doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='mean'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='median'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='max'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='min'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='stdev'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='mean'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='median'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='max'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='min'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='stdev'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='mean'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='median'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='max'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='min'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='stdev'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='mean'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='median'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='max'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='min'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='stdev'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='median'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='max'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='min'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='median'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='max'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='min'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), - doc._.AWE_Info(indicator='max_freq',summaryType='mean'), - doc._.AWE_Info(indicator='max_freq',summaryType='median'), - doc._.AWE_Info(indicator='max_freq',summaryType='max'), - doc._.AWE_Info(indicator='max_freq',summaryType='min'), - doc._.AWE_Info(indicator='max_freq',summaryType='stdev'), - doc._.AWE_Info(indicator='abstract_trait',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='animate',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='deictic',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='root', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), - doc._.AWE_Info(indicator='lemma_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), - doc._.AWE_Info(indicator='lower_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), - doc._.AWE_Info(infoType="Doc",indicator='delimiter_n',summaryType='total'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='proportion'), - doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='total'), - doc._.AWE_Info(infoType="Doc",indicator='transitions',transformations=['text'],summaryType='counts'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',summaryType='counts'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='sents',summaryType='counts'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='stdev'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='mean'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='median'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='max'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='min'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='stdev'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='mean'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='median'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='max'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='min'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='stdev'), - doc._.syntacticVariety, - doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_argumentation',summaryType='proportion'), - doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech',summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_egocentric',summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_allocentric',summaryType='proportion'), - doc._.AWE_Info(indicator='subjectivity',summaryType='mean'), - doc._.AWE_Info(indicator='subjectivity',summaryType='median'), - doc._.AWE_Info(indicator='subjectivity',summaryType='min'), - doc._.AWE_Info(indicator='subjectivity',summaryType='max'), - doc._.AWE_Info(indicator='subjectivity',summaryType='stdev'), - doc._.AWE_Info(indicator='polarity',summaryType='mean'), - doc._.AWE_Info(indicator='polarity',summaryType='median'), - doc._.AWE_Info(indicator='polarity',summaryType='min'), - doc._.AWE_Info(indicator='polarity',summaryType='max'), - doc._.AWE_Info(indicator='polarity',summaryType='stdev'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='mean'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='median'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='min'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='max'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='stdev'), - doc._.AWE_Info(indicator='devword', summaryType='proportion'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev') - ] - await websocket.send(json.dumps(summaryFeats)) - else: - await websocket.send(False) - -# --- [ MAIN ] -------------------------------------------------------------------------- - -if __name__ == '__main__': - print('parser server loading') - wsc = parserServer() - -# --- [ END ] --------------------------------------------------------------------------- +#!/usr/bin/env python3.10 +# Copyright 2022, Educational Testing Service + +import asyncio +import base64 +import websockets +import json +import awe_workbench + +# commented out cause we dont use them anymore +# import holmes_extractor +# import holmes_extractor.manager +# import holmes_extractor.ontology +# from holmes_extractor.manager import Manager +# from holmes_extractor.ontology import Ontology +from awe_components.components.utility_functions import content_pos + +class parserServer: + + # Initialize + parser = None + documents = {} #dict to store parsed documents + + def __init__(self, pipeline_def=[]): + + # Set up the NLP pipeline + print("initializing NLP pipeline...") + try: + self.nlp = spacy.load(SPACY_MODEL) + for comp in COMPONENTS: + self.nlp.add_pipe(comp) + except OSError as e: + print("There was an error loading 'en_core_web_lg' from spacy.") + raise OSError() from e + + # Start the event loop, and run until the kill command + print("starting event loop -- use [KILL] command to terminate.") + asyncio.get_event_loop().run_until_complete( + websockets.serve(self.run_parser, HOST, PORT, max_size=MAX_DATA_LIMIT)) + print('parser server running...') + asyncio.get_event_loop().run_forever() + print('parser server terminated...') + + async def kill(self, websocket): + """ + Command called to kill the parser server. + """ + self.parser.close() + await websocket.send(json.dumps(True)) + await websocket.close() + exit() + + summaryLabels = [ + 'mean_nSyll', + 'med_nSyll', + 'max_nSyll', + 'min_nSyll', + 'std_nSyll', + 'mean_sqnChars', + 'med_sqnChars', + 'max_sqnChars', + 'min_sqnChars', + 'std_sqnChars', + 'propn_latinate', + 'propn_academic', + 'mean_family_size', + 'med_family_size', + 'max_family_size', + 'min_family_size', + 'std_family_size', + 'mean_concreteness', + 'med_concreteness', + 'max_concreteness', + 'min_concreteness', + 'std_concreteness', + 'mean_logNSenses', + 'med_logNSenses', + 'max_logNSenses', + 'min_logNSenses', + 'std_logNSenses', + 'mean_nMorph', + 'med_nMorph', + 'max_nMorph', + 'min_nMorph', + 'std_nMorph', + 'mean_logfreq_HAL', + 'med_logfreq_HAL', + 'max_logfreq_HAL', + 'min_logfreq_HAL', + 'std_logfreq_HAL', + 'mean_root_fam_size', + 'med_root_fam_size', + 'max_root_fam_size', + 'min_root_fam_size', + 'std_root_fam_size', + 'mean_root_pfmf', + 'med_root_pfmf', + 'max_root_pfmf', + 'min_root_pfmf', + 'std_root_pfmf', + 'mean_token_frequency', + 'median_token_frequency', + 'max_token_frequency', + 'min_token_frequency', + 'std_token_frequency', + 'mean_lemma_frequency', + 'median_lemma_frequency', + 'max_lemma_frequency', + 'min_lemma_frequency', + 'std_lemma_frequency', + 'mean_max_frequency', + 'median_max_frequency', + 'max_max_frequency', + 'min_max_frequency', + 'std_max_frequency', + 'propn_abstract_traits', + 'propn_animates', + 'propn_deictics', + 'wf_type_count', + 'lemma_type_count', + 'type_count', + 'token_count', + 'paragraph_count', + 'mean_paragraph_length', + 'median_paragraph_length', + 'max_paragraph_length', + 'min_paragraph_length', + 'stdev_paragraph_length', + 'propn_transition_words', + 'transition_category_count', + 'transition_word_type_count', + 'mean_transition_distance', + 'median_transition_distance', + 'max_transition_distance', + 'min_transition_distance', + 'stdev_transition_distance', + 'mean_sent_cohesion', + 'median_sent_cohesion', + 'max_sent_cohesion', + 'min_sent_cohesion', + 'stdev_sent_cohesion', + 'mean_slider_cohesion', + 'median_slider_cohesion', + 'max_slider_cohesion', + 'min_slider_cohesion', + 'stdev_slider_cohesion', + 'num_corefs', + 'mean_coref_chain_len', + 'median_coref_chain_len', + 'max_coref_chain_len', + 'min_coref_chain_len', + 'stdev_coref_chain_len', + 'sentence_count', + 'mean_sentence_len', + 'median_sentence_len', + 'max_sentence_len', + 'min_sentence_len', + 'std_sentence_len', + 'mean_words_to_sentence_root', + 'median_words_to_sentence_root', + 'max_words_to_sentence_root', + 'min_words_to_sentence_root', + 'stdev_words_to_sentence_root', + 'meanRhemeDepth', + 'medianRhemeDepth', + 'maxRhemeDepth', + 'minRhemeDepth', + 'stdevRhemeDepth', + 'meanThemeDepth', + 'medianThemeDepth', + 'maxThemeDepth', + 'minThemeDepth', + 'stdevThemeDepth', + 'meanWeightedDepth', + 'medianWeightedDepth', + 'maxWeightedDepth', + 'minWeightedDepth', + 'stdevWeightedDepth', + 'meanWeightedBreadth', + 'medianWeightedBreadth', + 'maxWeightedBreadth', + 'minWeightedBreadth', + 'stdevWeightedBreadth', + 'syntacticVariety', + 'propn_past', + 'propn_argument_words', + 'propn_direct_speech', + 'propn_egocentric', + 'propn_allocentric', + 'mean_subjectivity', + 'median_subjectivity', + 'min_subjectivity', + 'max_subjectivity', + 'stdev_subjectivity', + 'mean_polarity', + 'median_polarity', + 'min_polarity', + 'max_polarity', + 'stdev_polarity', + 'mean_sentiment', + 'median_sentiment', + 'min_sentiment', + 'max_sentiment', + 'stdev_sentiment', + 'mean_main_cluster_span', + 'median_main_cluster_span', + 'min_main_cluster_span', + 'max_main_cluster_span', + 'stdev_main_cluster_span', + 'propn_devwords', + 'mean_devword_nsyll', + 'median_devword_nsyll', + 'min_devword_nsyll', + 'max_devword_nsyll', + 'stdev_devword_nsyll', + 'mean_devword_nmorph', + 'median_devword_nmorph', + 'min_devword_nmorph', + 'max_devword_nmorph', + 'stdev_devword_nmorph', + 'mean_devword_nsenses', + 'median_devword_nsenses', + 'min_devword_nsenses', + 'max_devword_nsenses', + 'stdev_devword_nsenses', + 'mean_devword_token_freq', + 'median_devword_token_freq', + 'min_devword_token_freq', + 'max_devword_token_freq', + 'stdev_devword_token_freq', + 'mean_devword_concreteness', + 'median_devword_concreteness', + 'min_devword_concreteness', + 'max_devword_concreteness', + 'stdev_devword_concreteness' + ] + + async def run_parser(self, websocket, path): + current_doc = '' + async for message in websocket: + + messagelist = json.loads(message) + print(messagelist) + command = messagelist[0] # helps reduce lines a bit + if command == 'KILL': + await websocket.send(json.dumps(True)) + await self.kill(websocket) + elif command == 'CLEARPARSED': + self.parser.remove_all_documents() + await websocket.send(json.dumps(True)) + elif command == 'REMOVE': + label = messagelist[1] + self.parser.remove_document(label) + await websocket.send(json.dumps(True)) + elif command == 'PARSEONE': + label = messagelist[1] + text = current_doc + messagelist[2] + current_doc = '' + if label in self.parser.list_document_labels(): + self.parser.remove_document(label) + self.parser.parse_and_register_document(text, label) + doc = self.parser.get_document(label) + await websocket.send(json.dumps(True)) + elif command == 'PARTIALTEXT': + # possibly need to set command = ' ' + current_document += messagelist[2] + elif command == 'PARSESET': + results = [] + [labels, texts] = messagelist[1] + for i, text in enumerate(texts): + text = texts[i] + print('parsed document', str(i+1), 'of', len(texts)) + if text is not None and len(text) > 0: + if labels[i] in self.parser.list_document_labels(): + self.parser.remove_document(labels[i]) + self.parser.parse_and_register_document( + text, labels[i]) + await websocket.send(json.dumps(True)) + elif command == 'LABELS': + labels = self.parser.list_document_labels() + await websocket.send(json.dumps(labels)) + elif command == 'SERIALIZED': + label = messagelist[1] + serialized = base64.b64encode( + self.parser.serialize_document(label)) + await websocket.send(serialized) + elif command == 'NEWSEARCHPHRASE': + search_phrase_text = messagelist[1] + label = messagelist[2] + ok = self.parser.register_search_phrase(search_phrase_text) + await websocket.send(ok) + elif command == 'REMOVELABELEDSEARCH': + label = messagelist[1] + self.parser.remove_all_search_phrases_with_label(label) + await websocket.send(json.dumps(True)) + elif command == 'CLEARSEARCHES': + self.parser.remove_all_search_phrases() + await websocket.send(json.dumps(True)) + elif command == 'SHOWSEARCHLABELS': + labels = self.parser.list_search_phrase_labels() + await websocket.send(json.dumps(labels)) + elif command == 'MATCH_DOCUMENTS': + matches = self.parser.match() + await websocket.send(json.dumps(matches)) + elif command == 'FREQUENCIES': + freqinfo = self.parser.get_corpus_frequency_information() + await websocket.send(json.dumps(freqinfo)) + elif command == 'TOPIC_MATCHES': + text_to_match = messagelist[1] + # This search takes a long list of keyword parameters, + # all of them with preset default thresholds. TBD: + # expose all of these parameters in more complex topic + # match functionality. Holmes extractor documentation + # describes what each of these parameters involves. + matches = self.parser.topic_match_documents_against( + text_to_match, + word_embedding_match_threshold=.42, + relation_score=20, + reverse_only_relation_score=15, + single_word_score=10, + single_word_any_tag_score=5, + different_match_cutoff_score=10, + relation_matching_frequency_threshold=0.0, + embedding_matching_frequency_threshold=0.0, + use_frequency_factor=True) + await websocket.send(json.dumps(matches)) + # Holmes Extractor also has supervised topic model + # building facilities using the functions + # get_supervised_topic_training_basis(), + # and deserialize_supervised_topic_classifier(). + # TBD: Add support for Holmes supervised topic model + # building. + elif command == 'AWE_INFO': + label = messagelist[1] + doc = self.parser.get_document(label) + indic = None + itype = None + summ = None + filt = None + if len(messagelist) == 3: + indic = messagelist[2] + await websocket.send( + doc._.AWE_Info(indicator=indic)) + elif len(messagelist) == 4: + indic = messagelist[2] + itype = messagelist[3] + await websocket.send( + doc._.AWE_Info(indicator=indic,infoType=itype)) + elif len(messagelist) == 5: + indic = messagelist[2] + itype = messagelist[3] + summ = messagelist[4] + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ) + if type(result) in [int, float, bool]: + await websocket.send(str(result)) + else: + await websocket.send(result) + + elif len(messagelist) == 6: + indic = messagelist[2] + itype = messagelist[3] + summ = messagelist[4] + filt = json.loads(messagelist[5]) + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt) + if type(result) in [int, float]: + await websocket.send(str(result)) + else: + await websocket.send(result) + elif len(messagelist) == 7: + indic = messagelist[2] + itype = messagelist[3] + summ = messagelist[4] + filt = json.loads(messagelist[5]) + trans = json.loads(messagelist[6]) + result = \ + doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt,transformations=trans) + if type(result) in [int, float]: + await websocket.send(str(result)) + else: + await websocket.send(result) + + else: + await websocket.send(json.dumps([])) + elif command == 'DOCTOKENS': + label = messagelist[1] + doc = self.parser.get_document(label) + if doc is not None: + await websocket.send( + doc._.AWE_Info(indicator='text')) + else: + await websocket.send(json.dumps([])) + elif command == 'DOCTOKENS_WITH_WS': + label = messagelist[1] + doc = self.parser.get_document(label) + if doc is not None: + await websocket.send( + doc._.AWE_Info(indicator='text_with_ws')) + else: + await websocket.send(json.dumps([])) + elif command == 'DOCHEADS': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + heads = [token.head.i for token in doc] + await websocket.send(json.dumps(heads)) + elif command == 'POS': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + heads = [token.pos_ for token in doc] + await websocket.send(json.dumps(heads)) + elif command == 'DOCDEPENDENCIES': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + deps = [token.dep_ for token in doc] + await websocket.send(json.dumps(deps)) + elif command == 'DOCENTITIES': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + ents = [[ent.text, + ent.start_char, + ent.end_char, + ent.label_] for ent in doc.ents] + await websocket.send(json.dumps(ents)) + elif command == 'TOKVECS': + # List returned contains lists pairing token + # offset with token vectors cast as strings + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.token_vectors)) + elif command == 'LEMMAS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(indicator='lemma_') + ) + elif command == 'STOPWORDS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(indicator='is_stop') + ) + elif command == 'WORDTYPES': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='lower_',filters=[('is_alpha', ['True']),('is_stop', ['False'])],summaryType = 'uniq') + )) + elif command == 'ROOTS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='root') + )) + elif command == 'SYLLABLES': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='nSyll'))) + elif command == 'WORDLENGTH': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt']) + )) + elif command == 'LATINATES': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])]) + )) + elif command == 'ACADEMICS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])]) + )) + elif command == 'SENSENUMS': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])]) + )) + elif command == 'LOGSENSENUMS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])],transformations=['log']) + )) + elif command == 'MORPHOLOGY': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='morphology') + )) + elif command == 'MORPHNUMS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='nMorph',filters=[('is_alpha', ['True'])]) + )) + elif command == 'HALROOTFREQS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])]) + )) + elif command == 'HALLOGROOTFREQS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log']) + )) + elif command == 'ROOTFAMSIZES': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])]) + )) + elif command == 'ROOTPFMFS': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])]) + )) + elif command == 'FAMILYSIZES': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='family_size',filters=[('is_alpha', ['True'])]) + )) + elif command == 'TOKFREQS': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])]) + )) + elif command == 'LEMMAFREQS': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='lemma_freq'))) + elif command == 'ROOTFREQS': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='root_Freq'))) + elif command == 'MAXFREQS': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='max_freq'))) + elif command == 'CONCRETES': + # Position in the list returned equals position + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='concreteness'))) + elif command == 'ABSTRACTTRAITS': + # Position in the list returned equals position + # in the document. Flag 1 if the word names an + # abstract trait, 0 otherwise + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='abstract_trait'))) + elif command == 'ANIMATES': + # Position in the list returned equals position + # in the document. Flag 1 if the word names an animate + # entity, 0 otherwise + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='animate'))) + elif command == 'LOCATIONS': + # Position in the list returned equals position + # in the document. Flag 1 if the word names an + # animate entity, 0 otherwise + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='location'))) + elif command == 'DEICTICS': + # Position in the list returned equals position in + # the document. Flag 1 if the word names a deictic + # element, 0 otherwise + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='deictic'))) + elif command == 'PARAGRAPHS': + # Items in the list indicate word offsets in the document + # at which paragraph breaks appear + label = messagelist[1] + doc = self.parser.get_document(label) + + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='delimiter_n') + )) + # doc._.paragraph_breaks)) + elif command == 'SENTENCES': + # Items in the list indicate word offsets in the document + # at which paragraph breaks appear + label = messagelist[1] + doc = self.parser.get_document(label) + + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='sents') + )) + #await websocket.send(json.dumps( + # [(sent.start, sent.end) for sent in doc.sents])) + elif command == 'PARAGRAPHLENS': + # Items in the list indicate lengths of paragraphs listed + # by offset in GETPARAGRAPHS + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen']) + )) + elif command == 'TRANSITIONPROFILE': + # Returns a rich data structure in a list containing + # (1) total number of transition words in the document + # (2) a dictionary that lists the frequency of a predefined + # set of transition word categories. + # (3) a dictionary that lists the frequency of individual + # transition words + # (4) a list of lists that provides for each transition + # word the word string, its start and stop offsets, + # and its transition word category. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.transition_word_profile)) + elif command == 'TRANSITIONS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='transitions') + )) + elif command == 'TRANSITIONDISTANCES': + # List of cosine distances between ten-word windows + # before and after a transition + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='transition_distances') + )) + elif command == 'SENTENCECOHESIONS': + # List of cosine distances between ten-word windows + # before and after a sentence boundary + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='intersentence_cohesions') + )) + elif command == 'SLIDERCOHESIONS': + # List of cosine distances between ten-word windows + # before and after a sliding window through the text + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='sliding_window_cohesions') + )) + elif command == 'COREFCHAINS': + # List of coreference chains found in document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.coref_chains)) + elif command == 'RHEMEDEPTHS': + # Syntactic depth of the sentence rheme -- part of + # sentence after the main verb where new information + # is usually placed + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='syntacticDepthsOfRhemes') + )) + elif command == 'THEMEDEPTHS': + # Syntactic depth of the sentence theme -- part + # of sentence before the main verb where given + # information is usually placed + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='syntacticDepthsOfThemes') + )) + elif command == 'WEIGHTEDDEPTHS': + # Syntactic depth weighted to penalize + # left-embedded structures + # that tend to be harder to process + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='weightedSyntacticDepths') + )) + elif command == 'WEIGHTEDBREADTHS': + # Syntactic breadth -- measure of extent to which sentence + # structure is additive, consisting of coordinated + # structures and loosely appended modifiers typical of + # spoken, often unplanned sentence production + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='weightedSyntacticBreadths') + )) + elif command == 'SENTENCETYPES': + # tuple giving number and location of sentence types + # format: + # (1,1,1,1,[1,2,3,4]) would be the record for a text that + # had four sentences -- simple sentence, compound sentence, + # complex sentence, and compound/complex sentence, in + # that order. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType='Doc',indicator='sentence_types') + )) + elif command == 'SYNTACTICPROFILE': + # Returns a dictionary containing frequency information + # about the syntactic relations and categories in the text. + # This includes information about the frequency of parts + # of speech, morphological categories, and syntactic + # dependencies between specific parts of speech. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.syntacticProfile)) + elif command == 'NORMEDSYNTACTICPROFILE': + # Returns a dictionary containing normalized + # frequency information (proportionas) for the + # syntactic relations and categories in the text. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.syntacticProfileNormed)) + elif command == 'QUOTEDTEXT': + # 1 for tokens within quotation marks, 0 for other text + # Position in the list corresponds to offset of token + # in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_quoted') + )) + elif command == 'DIRECTSPEECHSPANS': + # Data about subset of quoted text -- specifically, + # quoted text that is attributed to a specific + # speaker. + # + # Returns a list of lists with three top level + # elements: + # + # 1. Speaker: a list of offsets to tokens + # referring to the speaker(s) + # 2. Addressee: a list of offsets to tokens + # referring to the person(s) spoken to. + # 3. Span start offset + # 4. Span end offset. + # + # Note that first and second person pronouns + # inside direct speech may reference a person + # explicitly identified in the direct speech + # framing text. Coreferee reference resolution + # may apply, so that the speaker and addressee + # references may be to a proper noun at the head + # of a pronominal reference chain that includes + # the direct speech frame. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech') + )) + elif command == 'IN_DIRECT_SPEECH': + # 1 for tokens within quoted stretches of direct speech, + # 0 for other text. Position in the list corresponds to + # offset of token in the document + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_in_direct_speech') + )) + elif command == 'TENSECHANGES': + # list of positions where tense changed in the main + # document flow (not in direct speech/quotations, + # with flag to indicate whether shift was to past + # tense or to present tense. + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.vwp_tense_changes)) + elif command == 'PERSPECTIVES': + # list of positions where perspective is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_perspective') + )) + elif command == 'ATTRIBUTIONS': + # list of positions where attribution is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_attribution') + )) + elif command == 'SOURCES': + # list of positions where source is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_source') + )) + elif command == 'CITES': + # list of positions where source is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_cite') + )) + elif command == 'STATEMENTSOFFACT': + # list of positions where source is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_fact') + )) + elif command == 'STATEMENTSOFOPINION': + # list of positions where source is indicated + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_opinion') + )) + elif command == 'PERSPECTIVESPANS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.vwp_perspective_spans)) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='vwp_perspective_spans') + ) + elif command == 'STANCEMARKERS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.vwp_stance_markers)) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='vwp_stance_markers') + ) + + elif command == 'CLAIMTEXTS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_claim') + )) + + elif command == 'DISCUSSIONTEXTS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_discussion') + )) + + elif command == 'EMOTIONWORDS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_emotionword') + )) + + elif command == 'CHARACTERWORDS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_character_traits') + )) + + elif command == 'EMOTIONALSTATES': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='vwp_emotion_states') + ) + elif command == 'CHARACTERTRAITS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.vwp_character_traits)) + elif command == 'PROPOSITIONALATTITUDES': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_propositional_attitudes') + )) + elif command == 'SOCIAL_AWARENESS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(infoType="Doc",indicator='vwp_social_awareness') + )) + elif command == 'CONCRETEDETAILS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(indicator='concrete_detail') + ) + elif command == 'INTERACTIVELANGUAGE': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_interactive') + )) + elif command == 'ARGUMENTWORDS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_argumentword') + )) + elif command == 'ARGUMENTLANGUAGE': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_argumentation') + )) + elif command == 'EXPLICITARGUMENTWORDS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_explicit_argument') + )) + elif command == 'SUBJECTIVITYRATINGS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='subjectivity') + )) + elif command == 'SENTIMENTRATINGS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_sentiment') + )) + elif command == 'TONERATINGS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='vwp_tone') + )) + elif command == 'POLARITYRATINGS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='polarity') + )) + elif command == 'ASSESSMENTS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.assessments)) + elif command == 'PASTTENSESCOPE': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='in_past_tense_scope') + )) + elif command == 'GOVERNINGSUBJECTS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='governing_subject') + )) + elif command == 'CLUSTERS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='clusterID') + )) + elif command == 'PROMPTLANGUAGE': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.prompt_language)) + elif command == 'PROMPTRELATED': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.prompt_related)) + elif command == 'MAINIDEAS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='main_ideas') + ) + elif command == 'SUPPORTINGIDEAS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='supporting_ideas') + ) + elif command == 'SUPPORTINGDETAILS': + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send( + doc._.AWE_Info(infoType="Doc",indicator='supporting_details') + ) + elif command == 'CLUSTERINFO': + # Get the local word clusters our algorithm has + # clustered the words of the student document into + # + # The data is a list of records in this format: + # 1. The clusterID. + # 2. The cluster rating, which is roughly a measure + # of how important the cluster seems to be in the + # docyument as measured by the number of words in it + # and their relative infrequency + # 3. A list of the actual word strings in each cluster + # 4. The offsets of the words assigned to each cluster + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.clusterInfo)) + elif command == 'DEVWORDS': + # offset of the logical subject that governs + # the domain this token belongs to + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps( + doc._.AWE_Info(indicator='devword') + )) + elif command == 'NOMINALREFERENCES': + # offset of the logical subject that governs + # the domain this token belongs to + label = messagelist[1] + doc = self.parser.get_document(label) + await websocket.send(json.dumps(doc._.nominalReferences)) + elif command == 'DOCSUMMARYLABELS': + await websocket.send(json.dumps(self.summaryLabels)) + elif command == 'DOCSUMMARYFEATS': + label = messagelist[1] + doc = self.parser.get_document(label) + summaryFeats = [ + doc._.AWE_Info(indicator='nSyll',summaryType="mean"), + doc._.AWE_Info(indicator='nSyll',summaryType="median"), + doc._.AWE_Info(indicator='nSyll',summaryType="max"), + doc._.AWE_Info(indicator='nSyll',summaryType="min"), + doc._.AWE_Info(indicator='nSyll',summaryType="stdev"), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='mean'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='median'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='max'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='min'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='stdev'), + doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='mean'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='median'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='max'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='min'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='stdev'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='mean'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='median'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='max'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='min'), + doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='stdev'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='median'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='max'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='min'), + doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), + doc._.AWE_Info(indicator='max_freq',summaryType='mean'), + doc._.AWE_Info(indicator='max_freq',summaryType='median'), + doc._.AWE_Info(indicator='max_freq',summaryType='max'), + doc._.AWE_Info(indicator='max_freq',summaryType='min'), + doc._.AWE_Info(indicator='max_freq',summaryType='stdev'), + doc._.AWE_Info(indicator='abstract_trait',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='animate',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='deictic',filters=[('is_alpha', ['True'])], summaryType="proportion"), + doc._.AWE_Info(indicator='root', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='lemma_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='lower_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), + doc._.AWE_Info(infoType="Doc",indicator='delimiter_n',summaryType='total'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='proportion'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='total'), + doc._.AWE_Info(infoType="Doc",indicator='transitions',transformations=['text'],summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sents',summaryType='counts'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='stdev'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='mean'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='median'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='max'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='min'), + doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='stdev'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='mean'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='median'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='max'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='min'), + doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='stdev'), + doc._.syntacticVariety, + doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_argumentation',summaryType='proportion'), + doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_egocentric',summaryType='proportion'), + doc._.AWE_Info(indicator='vwp_allocentric',summaryType='proportion'), + doc._.AWE_Info(indicator='subjectivity',summaryType='mean'), + doc._.AWE_Info(indicator='subjectivity',summaryType='median'), + doc._.AWE_Info(indicator='subjectivity',summaryType='min'), + doc._.AWE_Info(indicator='subjectivity',summaryType='max'), + doc._.AWE_Info(indicator='subjectivity',summaryType='stdev'), + doc._.AWE_Info(indicator='polarity',summaryType='mean'), + doc._.AWE_Info(indicator='polarity',summaryType='median'), + doc._.AWE_Info(indicator='polarity',summaryType='min'), + doc._.AWE_Info(indicator='polarity',summaryType='max'), + doc._.AWE_Info(indicator='polarity',summaryType='stdev'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='mean'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='median'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='min'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='max'), + doc._.AWE_Info(indicator='vwp_sentiment',summaryType='stdev'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='mean'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='median'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='min'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='max'), + doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='stdev'), + doc._.AWE_Info(indicator='devword', summaryType='proportion'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), + doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev') + ] + await websocket.send(json.dumps(summaryFeats)) + else: + await websocket.send(False) + +if __name__ == '__main__': + print('parser server loading') + wsc = parserServer() \ No newline at end of file diff --git a/awe_workbench/web/parserServer.py:Zone.Identifier b/awe_workbench/web/parserServer.py:Zone.Identifier new file mode 100644 index 0000000..053d112 --- /dev/null +++ b/awe_workbench/web/parserServer.py:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=about:internet From 939148ddcce76b9d552043f7299a14ee3cfd51de Mon Sep 17 00:00:00 2001 From: askhan6 Date: Mon, 7 Oct 2024 13:34:21 -0400 Subject: [PATCH 27/39] attempting removal of holmes from parserServer --- awe_workbench/web/parserServer.py:Zone.Identifier | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 awe_workbench/web/parserServer.py:Zone.Identifier diff --git a/awe_workbench/web/parserServer.py:Zone.Identifier b/awe_workbench/web/parserServer.py:Zone.Identifier deleted file mode 100644 index 053d112..0000000 --- a/awe_workbench/web/parserServer.py:Zone.Identifier +++ /dev/null @@ -1,3 +0,0 @@ -[ZoneTransfer] -ZoneId=3 -HostUrl=about:internet From 6a08f0a107357f5949beb8d0220432a78b8dabf4 Mon Sep 17 00:00:00 2001 From: arsalaan Date: Mon, 7 Oct 2024 15:36:37 -0400 Subject: [PATCH 28/39] refactored to (mostly) use a local dict for document storage --- awe_workbench/web/parserServer.py | 283 ++++++++++-------- .../web/parserServer.py:Zone.Identifier | 3 + 2 files changed, 169 insertions(+), 117 deletions(-) create mode 100644 awe_workbench/web/parserServer.py:Zone.Identifier diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index 6750638..1a1c6da 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -1,11 +1,14 @@ #!/usr/bin/env python3.10 # Copyright 2022, Educational Testing Service +import awe_workbench import asyncio import base64 import websockets import json -import awe_workbench +import spacy +import coreferee +import spacytextblob.spacytextblob # commented out cause we dont use them anymore # import holmes_extractor @@ -14,6 +17,27 @@ # from holmes_extractor.manager import Manager # from holmes_extractor.ontology import Ontology from awe_components.components.utility_functions import content_pos +import awe_components.components.lexicalFeatures +import awe_components.components.syntaxDiscourseFeats +import awe_components.components.viewpointFeatures +import awe_components.components.lexicalClusters +import awe_components.components.contentSegmentation +from awe_components.components.utility_functions import content_pos +from awe_workbench.pipeline import pipeline_def + + + +# --- [ CONSTS/VARS ] ------------------------------------------------------------------- + +HOST = 'localhost' +PORT = 8766 +MAX_DATA_LIMIT = 2 ** 24 +SPACY_MODEL = 'en_core_web_lg' +COMPONENTS = [el['component'] for el in pipeline_def] +AWE_INFO_KEYS = ['indicator', 'infoType', 'summaryType', 'filters', 'transformations'] + +# --- [ CLASSES ] ----------------------------------------------------------------------- + class parserServer: @@ -246,20 +270,27 @@ async def run_parser(self, websocket, path): await websocket.send(json.dumps(True)) await self.kill(websocket) elif command == 'CLEARPARSED': - self.parser.remove_all_documents() + self.documents.clear() await websocket.send(json.dumps(True)) elif command == 'REMOVE': label = messagelist[1] - self.parser.remove_document(label) + if label in self.documents: + del self.documents[label] + # self.parser.remove_document(label) await websocket.send(json.dumps(True)) elif command == 'PARSEONE': label = messagelist[1] text = current_doc + messagelist[2] current_doc = '' - if label in self.parser.list_document_labels(): - self.parser.remove_document(label) - self.parser.parse_and_register_document(text, label) - doc = self.parser.get_document(label) + #if label in self.parser.list_document_labels(): + # self.parser.remove_document(label) + #self.parser.parse_and_register_document(text, label) + #doc = self.parser.get_document(label) + if label in self.documents: + del self.documents[label] + self.documents[label] = text + self.documents[label] = text + doc = self.documents[label] await websocket.send(json.dumps(True)) elif command == 'PARTIALTEXT': # possibly need to set command = ' ' @@ -271,38 +302,51 @@ async def run_parser(self, websocket, path): text = texts[i] print('parsed document', str(i+1), 'of', len(texts)) if text is not None and len(text) > 0: - if labels[i] in self.parser.list_document_labels(): - self.parser.remove_document(labels[i]) - self.parser.parse_and_register_document( - text, labels[i]) + #if labels[i] in self.parser.list_document_labels(): + # self.parser.remove_document(labels[i]) + #self.parser.parse_and_register_document( + # text, labels[i]) + + if labels[i]in self.documents: + del self.documents[labels[i]] + self.documents[labels[i]] = text await websocket.send(json.dumps(True)) elif command == 'LABELS': - labels = self.parser.list_document_labels() + # labels = self.parser.list_document_labels() + labels = list(self.documents.keys()) await websocket.send(json.dumps(labels)) elif command == 'SERIALIZED': label = messagelist[1] + #serialized = base64.b64encode( + # self.parser.serialize_document(label)) serialized = base64.b64encode( - self.parser.serialize_document(label)) + self.documents[label].encode('utf-8')) await websocket.send(serialized) elif command == 'NEWSEARCHPHRASE': search_phrase_text = messagelist[1] label = messagelist[2] - ok = self.parser.register_search_phrase(search_phrase_text) + # ok = self.parser.register_search_phrase(search_phrase_text) + ok = (search_phrase_text in self.documents[label]) await websocket.send(ok) elif command == 'REMOVELABELEDSEARCH': + #TODO: where are all search phrases? I need to remove all of them that have a given label label = messagelist[1] self.parser.remove_all_search_phrases_with_label(label) await websocket.send(json.dumps(True)) elif command == 'CLEARSEARCHES': + #TODO: where are all search phrases? I need to clear all of them self.parser.remove_all_search_phrases() await websocket.send(json.dumps(True)) elif command == 'SHOWSEARCHLABELS': + #TODO: where are all search phrases? I need to list all of them labels = self.parser.list_search_phrase_labels() await websocket.send(json.dumps(labels)) elif command == 'MATCH_DOCUMENTS': + # TODO what does the match() function do? matches = self.parser.match() await websocket.send(json.dumps(matches)) elif command == 'FREQUENCIES': + # TODO whats this lol freqinfo = self.parser.get_corpus_frequency_information() await websocket.send(json.dumps(freqinfo)) elif command == 'TOPIC_MATCHES': @@ -312,17 +356,22 @@ async def run_parser(self, websocket, path): # expose all of these parameters in more complex topic # match functionality. Holmes extractor documentation # describes what each of these parameters involves. - matches = self.parser.topic_match_documents_against( - text_to_match, - word_embedding_match_threshold=.42, - relation_score=20, - reverse_only_relation_score=15, - single_word_score=10, - single_word_any_tag_score=5, - different_match_cutoff_score=10, - relation_matching_frequency_threshold=0.0, - embedding_matching_frequency_threshold=0.0, - use_frequency_factor=True) + + #matches = self.parser.topic_match_documents_against( + # text_to_match, + # word_embedding_match_threshold=.42, + # relation_score=20, + # reverse_only_relation_score=15, + # single_word_score=10, + # single_word_any_tag_score=5, + # different_match_cutoff_score=10, + # relation_matching_frequency_threshold=0.0, + # embedding_matching_frequency_threshold=0.0, + # use_frequency_factor=True) + + if text_to_match in self.documents: + matches = self.documents[text_to_match] + #TODO need an else block I think await websocket.send(json.dumps(matches)) # Holmes Extractor also has supervised topic model # building facilities using the functions @@ -332,7 +381,7 @@ async def run_parser(self, websocket, path): # building. elif command == 'AWE_INFO': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] indic = None itype = None summ = None @@ -385,7 +434,7 @@ async def run_parser(self, websocket, path): await websocket.send(json.dumps([])) elif command == 'DOCTOKENS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] if doc is not None: await websocket.send( doc._.AWE_Info(indicator='text')) @@ -393,7 +442,7 @@ async def run_parser(self, websocket, path): await websocket.send(json.dumps([])) elif command == 'DOCTOKENS_WITH_WS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] if doc is not None: await websocket.send( doc._.AWE_Info(indicator='text_with_ws')) @@ -403,28 +452,28 @@ async def run_parser(self, websocket, path): # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] heads = [token.head.i for token in doc] await websocket.send(json.dumps(heads)) elif command == 'POS': # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] heads = [token.pos_ for token in doc] await websocket.send(json.dumps(heads)) elif command == 'DOCDEPENDENCIES': # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] deps = [token.dep_ for token in doc] await websocket.send(json.dumps(deps)) elif command == 'DOCENTITIES': # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] ents = [[ent.text, ent.start_char, ent.end_char, @@ -434,53 +483,53 @@ async def run_parser(self, websocket, path): # List returned contains lists pairing token # offset with token vectors cast as strings label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.token_vectors)) elif command == 'LEMMAS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send( doc._.AWE_Info(indicator='lemma_') ) elif command == 'STOPWORDS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send( doc._.AWE_Info(indicator='is_stop') ) elif command == 'WORDTYPES': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='lower_',filters=[('is_alpha', ['True']),('is_stop', ['False'])],summaryType = 'uniq') )) elif command == 'ROOTS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='root') )) elif command == 'SYLLABLES': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='nSyll'))) elif command == 'WORDLENGTH': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt']) )) elif command == 'LATINATES': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])]) )) elif command == 'ACADEMICS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])]) )) @@ -488,37 +537,37 @@ async def run_parser(self, websocket, path): # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])]) )) elif command == 'LOGSENSENUMS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])],transformations=['log']) )) elif command == 'MORPHOLOGY': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='morphology') )) elif command == 'MORPHNUMS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='nMorph',filters=[('is_alpha', ['True'])]) )) elif command == 'HALROOTFREQS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])]) )) elif command == 'HALLOGROOTFREQS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log']) )) @@ -526,7 +575,7 @@ async def run_parser(self, websocket, path): # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])]) )) @@ -534,7 +583,7 @@ async def run_parser(self, websocket, path): # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])]) )) @@ -542,7 +591,7 @@ async def run_parser(self, websocket, path): # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='family_size',filters=[('is_alpha', ['True'])]) )) @@ -550,7 +599,7 @@ async def run_parser(self, websocket, path): # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])]) )) @@ -558,28 +607,28 @@ async def run_parser(self, websocket, path): # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='lemma_freq'))) elif command == 'ROOTFREQS': # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='root_Freq'))) elif command == 'MAXFREQS': # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='max_freq'))) elif command == 'CONCRETES': # Position in the list returned equals position # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='concreteness'))) elif command == 'ABSTRACTTRAITS': @@ -587,7 +636,7 @@ async def run_parser(self, websocket, path): # in the document. Flag 1 if the word names an # abstract trait, 0 otherwise label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='abstract_trait'))) elif command == 'ANIMATES': @@ -595,7 +644,7 @@ async def run_parser(self, websocket, path): # in the document. Flag 1 if the word names an animate # entity, 0 otherwise label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='animate'))) elif command == 'LOCATIONS': @@ -603,7 +652,7 @@ async def run_parser(self, websocket, path): # in the document. Flag 1 if the word names an # animate entity, 0 otherwise label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='location'))) elif command == 'DEICTICS': @@ -611,14 +660,14 @@ async def run_parser(self, websocket, path): # the document. Flag 1 if the word names a deictic # element, 0 otherwise label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='deictic'))) elif command == 'PARAGRAPHS': # Items in the list indicate word offsets in the document # at which paragraph breaks appear label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType="Doc",indicator='delimiter_n') @@ -628,7 +677,7 @@ async def run_parser(self, websocket, path): # Items in the list indicate word offsets in the document # at which paragraph breaks appear label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType="Doc",indicator='sents') @@ -639,7 +688,7 @@ async def run_parser(self, websocket, path): # Items in the list indicate lengths of paragraphs listed # by offset in GETPARAGRAPHS label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen']) )) @@ -654,12 +703,12 @@ async def run_parser(self, websocket, path): # word the word string, its start and stop offsets, # and its transition word category. label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.transition_word_profile)) elif command == 'TRANSITIONS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType='Doc',indicator='transitions') )) @@ -667,7 +716,7 @@ async def run_parser(self, websocket, path): # List of cosine distances between ten-word windows # before and after a transition label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType='Doc',indicator='transition_distances') )) @@ -675,7 +724,7 @@ async def run_parser(self, websocket, path): # List of cosine distances between ten-word windows # before and after a sentence boundary label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType='Doc',indicator='intersentence_cohesions') )) @@ -683,14 +732,14 @@ async def run_parser(self, websocket, path): # List of cosine distances between ten-word windows # before and after a sliding window through the text label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType='Doc',indicator='sliding_window_cohesions') )) elif command == 'COREFCHAINS': # List of coreference chains found in document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.coref_chains)) elif command == 'RHEMEDEPTHS': @@ -698,7 +747,7 @@ async def run_parser(self, websocket, path): # sentence after the main verb where new information # is usually placed label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType='Doc',indicator='syntacticDepthsOfRhemes') )) @@ -707,7 +756,7 @@ async def run_parser(self, websocket, path): # of sentence before the main verb where given # information is usually placed label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='syntacticDepthsOfThemes') )) @@ -716,7 +765,7 @@ async def run_parser(self, websocket, path): # left-embedded structures # that tend to be harder to process label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='weightedSyntacticDepths') )) @@ -726,7 +775,7 @@ async def run_parser(self, websocket, path): # structures and loosely appended modifiers typical of # spoken, often unplanned sentence production label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='weightedSyntacticBreadths') )) @@ -738,7 +787,7 @@ async def run_parser(self, websocket, path): # complex sentence, and compound/complex sentence, in # that order. label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType='Doc',indicator='sentence_types') )) @@ -749,7 +798,7 @@ async def run_parser(self, websocket, path): # of speech, morphological categories, and syntactic # dependencies between specific parts of speech. label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.syntacticProfile)) elif command == 'NORMEDSYNTACTICPROFILE': @@ -757,7 +806,7 @@ async def run_parser(self, websocket, path): # frequency information (proportionas) for the # syntactic relations and categories in the text. label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.syntacticProfileNormed)) elif command == 'QUOTEDTEXT': @@ -765,7 +814,7 @@ async def run_parser(self, websocket, path): # Position in the list corresponds to offset of token # in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_quoted') )) @@ -793,7 +842,7 @@ async def run_parser(self, websocket, path): # of a pronominal reference chain that includes # the direct speech frame. label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech') )) @@ -802,7 +851,7 @@ async def run_parser(self, websocket, path): # 0 for other text. Position in the list corresponds to # offset of token in the document label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_in_direct_speech') )) @@ -812,54 +861,54 @@ async def run_parser(self, websocket, path): # with flag to indicate whether shift was to past # tense or to present tense. label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.vwp_tense_changes)) elif command == 'PERSPECTIVES': # list of positions where perspective is indicated label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_perspective') )) elif command == 'ATTRIBUTIONS': # list of positions where attribution is indicated label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_attribution') )) elif command == 'SOURCES': # list of positions where source is indicated label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_source') )) elif command == 'CITES': # list of positions where source is indicated label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_cite') )) elif command == 'STATEMENTSOFFACT': # list of positions where source is indicated label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_fact') )) elif command == 'STATEMENTSOFOPINION': # list of positions where source is indicated label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_opinion') )) elif command == 'PERSPECTIVESPANS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.vwp_perspective_spans)) await websocket.send( @@ -867,7 +916,7 @@ async def run_parser(self, websocket, path): ) elif command == 'STANCEMARKERS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps(doc._.vwp_stance_markers)) await websocket.send( doc._.AWE_Info(infoType="Doc",indicator='vwp_stance_markers') @@ -875,154 +924,154 @@ async def run_parser(self, websocket, path): elif command == 'CLAIMTEXTS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_claim') )) elif command == 'DISCUSSIONTEXTS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_discussion') )) elif command == 'EMOTIONWORDS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_emotionword') )) elif command == 'CHARACTERWORDS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_character_traits') )) elif command == 'EMOTIONALSTATES': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send( doc._.AWE_Info(infoType="Doc",indicator='vwp_emotion_states') ) elif command == 'CHARACTERTRAITS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.vwp_character_traits)) elif command == 'PROPOSITIONALATTITUDES': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType="Doc",indicator='vwp_propositional_attitudes') )) elif command == 'SOCIAL_AWARENESS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(infoType="Doc",indicator='vwp_social_awareness') )) elif command == 'CONCRETEDETAILS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send( doc._.AWE_Info(indicator='concrete_detail') ) elif command == 'INTERACTIVELANGUAGE': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_interactive') )) elif command == 'ARGUMENTWORDS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_argumentword') )) elif command == 'ARGUMENTLANGUAGE': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_argumentation') )) elif command == 'EXPLICITARGUMENTWORDS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_explicit_argument') )) elif command == 'SUBJECTIVITYRATINGS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='subjectivity') )) elif command == 'SENTIMENTRATINGS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_sentiment') )) elif command == 'TONERATINGS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='vwp_tone') )) elif command == 'POLARITYRATINGS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='polarity') )) elif command == 'ASSESSMENTS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps(doc._.assessments)) elif command == 'PASTTENSESCOPE': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='in_past_tense_scope') )) elif command == 'GOVERNINGSUBJECTS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='governing_subject') )) elif command == 'CLUSTERS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='clusterID') )) elif command == 'PROMPTLANGUAGE': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps(doc._.prompt_language)) elif command == 'PROMPTRELATED': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps(doc._.prompt_related)) elif command == 'MAINIDEAS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send( doc._.AWE_Info(infoType="Doc",indicator='main_ideas') ) elif command == 'SUPPORTINGIDEAS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send( doc._.AWE_Info(infoType="Doc",indicator='supporting_ideas') ) elif command == 'SUPPORTINGDETAILS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send( doc._.AWE_Info(infoType="Doc",indicator='supporting_details') ) @@ -1039,13 +1088,13 @@ async def run_parser(self, websocket, path): # 3. A list of the actual word strings in each cluster # 4. The offsets of the words assigned to each cluster label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps(doc._.clusterInfo)) elif command == 'DEVWORDS': # offset of the logical subject that governs # the domain this token belongs to label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps( doc._.AWE_Info(indicator='devword') )) @@ -1053,13 +1102,13 @@ async def run_parser(self, websocket, path): # offset of the logical subject that governs # the domain this token belongs to label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] await websocket.send(json.dumps(doc._.nominalReferences)) elif command == 'DOCSUMMARYLABELS': await websocket.send(json.dumps(self.summaryLabels)) elif command == 'DOCSUMMARYFEATS': label = messagelist[1] - doc = self.parser.get_document(label) + doc = self.documents[label] summaryFeats = [ doc._.AWE_Info(indicator='nSyll',summaryType="mean"), doc._.AWE_Info(indicator='nSyll',summaryType="median"), diff --git a/awe_workbench/web/parserServer.py:Zone.Identifier b/awe_workbench/web/parserServer.py:Zone.Identifier new file mode 100644 index 0000000..053d112 --- /dev/null +++ b/awe_workbench/web/parserServer.py:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=about:internet From 10fb9254cf2989f02e3cb947650e1db2c219560a Mon Sep 17 00:00:00 2001 From: askhan6 Date: Mon, 7 Oct 2024 15:38:45 -0400 Subject: [PATCH 29/39] refactored to (mostly) use a local dict for document storage --- awe_workbench/web/parserServer.py:Zone.Identifier | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 awe_workbench/web/parserServer.py:Zone.Identifier diff --git a/awe_workbench/web/parserServer.py:Zone.Identifier b/awe_workbench/web/parserServer.py:Zone.Identifier deleted file mode 100644 index 053d112..0000000 --- a/awe_workbench/web/parserServer.py:Zone.Identifier +++ /dev/null @@ -1,3 +0,0 @@ -[ZoneTransfer] -ZoneId=3 -HostUrl=about:internet From 1fa4f29d6c207776bc7b5d09a06bcc1f60a69b12 Mon Sep 17 00:00:00 2001 From: ArsalaanK7 <132946619+ArsalaanK7@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:24:29 -0400 Subject: [PATCH 30/39] Update parserServer.py --- awe_workbench/web/parserServer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index 1a1c6da..cc0f1c2 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -309,7 +309,7 @@ async def run_parser(self, websocket, path): if labels[i]in self.documents: del self.documents[labels[i]] - self.documents[labels[i]] = text + self.documents[labels[i]] = self.nlp(text) await websocket.send(json.dumps(True)) elif command == 'LABELS': # labels = self.parser.list_document_labels() @@ -1299,4 +1299,4 @@ async def run_parser(self, websocket, path): if __name__ == '__main__': print('parser server loading') - wsc = parserServer() \ No newline at end of file + wsc = parserServer() From e530d7f8d044302057d0ff1b0cf1fc05ebd2c0d6 Mon Sep 17 00:00:00 2001 From: ArsalaanK7 <132946619+ArsalaanK7@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:29:07 -0400 Subject: [PATCH 31/39] Update batch_summary.py Added synchronization to batchSummary to make merging dataframes work right --- examples/batch_summary.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/batch_summary.py b/examples/batch_summary.py index 5d19700..066953f 100644 --- a/examples/batch_summary.py +++ b/examples/batch_summary.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import asyncio import csv import os import math @@ -40,7 +41,7 @@ def initialize(): # return spellchecker and parser objects return cs, parser, lt -if __name__ == '__main__': +async def main(): parser = argparse.ArgumentParser(description="Parse a student text file") parser.add_argument( @@ -67,7 +68,7 @@ def initialize(): doc_contents.append(contents) print('Running LanguageTool') - df1 = lt.summarizeMultipleTexts(ids, doc_contents) + df1 = await lt.summarizeMultipleTexts(ids, doc_contents) #texts = None print('Running spellcorrect') @@ -117,3 +118,7 @@ def initialize(): dfFinal = pd.merge(df2, pd.merge(df1,syntactic_profile, on='ID'), on='ID') dfFinal.to_csv(args.directory + "/output.csv") + + +if __name__ == '__main__': + asyncio.run(main()) From ca93d0f552c7b7008b905c2dff7cf3d509c835d0 Mon Sep 17 00:00:00 2001 From: arsalaan Date: Sun, 27 Oct 2024 17:51:11 -0400 Subject: [PATCH 32/39] no meaningful changes, just removed unused imports --- awe_workbench/web/parserServer.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/awe_workbench/web/parserServer.py b/awe_workbench/web/parserServer.py index cc0f1c2..76b0c72 100644 --- a/awe_workbench/web/parserServer.py +++ b/awe_workbench/web/parserServer.py @@ -10,12 +10,6 @@ import coreferee import spacytextblob.spacytextblob -# commented out cause we dont use them anymore -# import holmes_extractor -# import holmes_extractor.manager -# import holmes_extractor.ontology -# from holmes_extractor.manager import Manager -# from holmes_extractor.ontology import Ontology from awe_components.components.utility_functions import content_pos import awe_components.components.lexicalFeatures import awe_components.components.syntaxDiscourseFeats From 12e2c8abe08379ae79a9c00a33f755728829bf66 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 31 Oct 2024 15:42:24 -0400 Subject: [PATCH 33/39] Removed unecessary files --- examples/temp/lion.txt | 9 + installation/conda_fresh_install.sh | 120 -- installation/install_template.sh | 18 - installation/venv_fresh_install.sh | 121 -- tests/old_tests/__init__.py | 0 tests/old_tests/test_awe_nlp.py | 261 ---- tests/old_tests/test_lexical_clusters.py | 50 - tests/old_tests/test_lexical_features.py | 457 ------- .../test_prompt_specific_features.py | 53 - tests/old_tests/test_server_api.py | 1178 ----------------- .../test_syntax_discourse_features.py | 434 ------ .../test_viewpoint_perspective_features.py | 101 -- .../test_viewpoint_perspective_features2.py | 249 ---- .../test_viewpoint_perspective_features3.py | 248 ---- .../test_viewpoint_perspective_features4.py | 245 ---- .../test_viewpoint_perspective_features5.py | 246 ---- .../test_viewpoint_perspective_features6.py | 242 ---- .../test_viewpoint_perspective_features7.py | 214 --- 18 files changed, 9 insertions(+), 4237 deletions(-) create mode 100644 examples/temp/lion.txt delete mode 100644 installation/conda_fresh_install.sh delete mode 100644 installation/install_template.sh delete mode 100644 installation/venv_fresh_install.sh delete mode 100644 tests/old_tests/__init__.py delete mode 100644 tests/old_tests/test_awe_nlp.py delete mode 100644 tests/old_tests/test_lexical_clusters.py delete mode 100644 tests/old_tests/test_lexical_features.py delete mode 100644 tests/old_tests/test_prompt_specific_features.py delete mode 100644 tests/old_tests/test_server_api.py delete mode 100644 tests/old_tests/test_syntax_discourse_features.py delete mode 100644 tests/old_tests/test_viewpoint_perspective_features.py delete mode 100644 tests/old_tests/test_viewpoint_perspective_features2.py delete mode 100644 tests/old_tests/test_viewpoint_perspective_features3.py delete mode 100644 tests/old_tests/test_viewpoint_perspective_features4.py delete mode 100644 tests/old_tests/test_viewpoint_perspective_features5.py delete mode 100644 tests/old_tests/test_viewpoint_perspective_features6.py delete mode 100644 tests/old_tests/test_viewpoint_perspective_features7.py diff --git a/examples/temp/lion.txt b/examples/temp/lion.txt new file mode 100644 index 0000000..214ac6d --- /dev/null +++ b/examples/temp/lion.txt @@ -0,0 +1,9 @@ +A lion lay asleep in the forest, his great head resting on his paws. A timid little mouse came upon him unexpectedly, and in her fright and haste to get away, ran across the lion's nose. Roused from his nap, the lion laid his huge paw angrily on the tiny creature to kill her. + +"Spare me!" begged the poor mouse. "Please let me go and some day I will surely repay you." + +The lion was much amused to think that a mouse could ever help him. But he was generous and finally let the mouse go. + +Some days later, while stalking his prey in the forest, the lion was caught in the coils of a hunter's net. Unable to free himself, he filled the forest with his angry roaring. The mouse knew the voice and quickly found the lion struggling in the net. Running to one of the great ropes that bound him, she gnawed it until it parted, and soon the lion was free. + +"You laughed when I said I would repay you," said the Mouse. "Now you see that even a Mouse can help a Lion." diff --git a/installation/conda_fresh_install.sh b/installation/conda_fresh_install.sh deleted file mode 100644 index faa97a9..0000000 --- a/installation/conda_fresh_install.sh +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env bash - -# AWE Workbench Install Script (for conda) -# Author: Caleb Scott - -# This script installs all necessary dependencies for AWE -# We make the following assumptions: -# * You have a working conda environment running on python 3.12. -# * You have pip installed in the conda environment. -# (you can do this by 'conda create --name XXXX python=3.12 pip') - -# Sanity Check: let the user know the preconditions. - -NEW_DIR=${0:-"arglab-dev-py312"} -NEW_CONDA_ENV=${1:-"noconda"} -NEW_GIT=${2:-"nogit"} -BRANCH=${3:-"main"} -PROTOBUF=${4:-"noproto"} -DATA=${5:-"data"} -NEW_JAVA=${6:-"nojava"} - -echo "============================ WARNING ==============================" -echo "\nYou are about to install AWE on this system." -echo "\n* Ensure that you are using a python3.12 version." -echo "\n* The following repos are needed: " -echo "\n > coreferee" -echo "\n > holmes-extractor-expandable" -echo "\n > AWE_LanguageTool" -echo "\n > AWE_SpellCorrect" -echo "\n > AWE_Lexica" -echo "\n > AWE_Components" -echo "\n > AWE_Workbench" -echo "\n" -echo "\nUsage: " -echo "\n ./conda_fresh_install.sh [DIR] [CONDA] [GIT] [PROTOBUF] [DATA] [JAVA]" -echo "\n NEW_DIR: [XXXX/nodir]" -echo "\n CONDA: [conda/noconda]" -echo "\n GIT: [git/nogit]" -echo "\n BRANCH: [XXXX/main]" -echo "\n PROTOBUF: [proto/noproto]" -echo "\n DATA: [data/nodata]" -echo "\n JAVA: [java/nojava]" -echo "\n===================================================================" - -read -p "\n\n Continue [Y/N]? " -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]] -then - # Set up new dir and cd to it - if [ $NEW_DIR -ne "nodir" ] - then - echo "Setting up new dev directory..." - mkdir $NEW_DIR - cd $NEW_DIR - fi - - # Set up new conda env - if [[ $NEW_CONDA_ENV =~ ^conda$ ]] - then - echo "Setting up new conda env..." - conda create --name arglab-dev-py312 python=3.12 pip - fi - - # Activate the env - echo "Activating conda env..." - conda activate arglab-dev-py312 - - # Download relevant github repos - if [[ $NEW_GIT =~ ^git$ ]] - then - echo "Pulling github repos..." - git clone -b $BRANCH git@github.com:ArgLab/coreferee.git - git clone -b $BRANCH git@github.com:ArgLab/holmes-extractor-expandable.git - git clone -b $BRANCH git@github.com:ArgLab/AWE_LanguageTool.git - git clone -b $BRANCH git@github.com:ArgLab/AWE_Lexica.git - git clone -b $BRANCH git@github.com:ArgLab/AWE_Components.git - git clone -b $BRANCH git@github.com:ArgLab/AWE_Workbench.git - fi - - # Install repos - echo "Installing repos..." - pip install -e ./coreferee - pip install -e ./holmes-extractor-expandable - pip install -e ./AWE_LanguageTool - pip install -e ./AWE_SpellCorrect - pip install -e ./AWE_Components - pip install -e ./AWE_Workbench - - # PROTOBUF fix? - if [[ $PROTOBUF =~ ^proto$ ]] - then - pip install protobuf==3.20.0 - fi - - # Download data - if [[ $DATA =~ ^data$ ]] - then - echo "Downloading data..." - python -m awe_workbench.setup.data --develop - fi - - # Install java - if [[ $JAVA =~ ^java$ ]] - then - echo "Installing java sdk..." - # Source: https://askubuntu.com/questions/1279677/how-to-install-openjdk-14-jdk-on-ubuntu-16-04 - wget https://download.java.net/java/GA/jdk14.0.2/205943a0976c4ed48cb16f1043c5c647/12/GPL/openjdk-14.0.2_linux-x64_bin.tar.gz - - tar xvf openjdk-14.0.2_linux-x64_bin.tar.gz - - mv jdk-14.0.2 /usr/lib/jvm - - update-alternatives --install "/usr/bin/javac" "javac" "/usr/lib/jvm/jdk-14.0.2/bin/javac" 3 - update-alternatives --install "/usr/bin/java" "java" "/usr/lib/jvm/jdk-14.0.2/bin/java" 3 - update-alternatives --set "javac" "/usr/lib/jvm/jdk-14.0.2/bin/javac" - update-alternatives --set "java" "/usr/lib/jvm/jdk-14.0.2/bin/java" - - update-alternatives --config java - fi -fi diff --git a/installation/install_template.sh b/installation/install_template.sh deleted file mode 100644 index d510b74..0000000 --- a/installation/install_template.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -# AWE Workbench Install Script Template -# Author: Caleb Scott - -# This script shows how to use the command-line arguments for the fresh_install.sh scripts. - -# CASE 1: no dir, no env, no git, main branch, no proto, no data, no java (conda env) -./conda_fresh_install.sh arglab-dev-py312 conda git main proto data java - -# CASE 1 (venv) -# ./fresh_install.sh arglab-dev-py312 venv git main proto data java - -# CASE 2: dir + env + git, main branch, no proto, no data, java (conda env) -./conda_fresh_install.sh nodir noconda nogit main proto data nojava - -# CASE 2 (venv) -# ./fresh_install.sh nodir novenv nogit main proto data nojava diff --git a/installation/venv_fresh_install.sh b/installation/venv_fresh_install.sh deleted file mode 100644 index 83d2715..0000000 --- a/installation/venv_fresh_install.sh +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env bash - -# AWE Workbench Install Script (for python venv) -# Author: Caleb Scott - -# This script installs all necessary dependencies for AWE -# We make the following assumptions: -# * You are using python3.12 - -# Sanity Check: let the user know the preconditions. - -NEW_DIR=${0:-"arglab-dev-py312"} -NEW_CONDA_ENV=${1:-"novenv"} -NEW_GIT=${2:-"nogit"} -BRANCH=${3:-"main"} -PROTOBUF=${4:-"noproto"} -DATA=${5:-"data"} -NEW_JAVA=${6:-"nojava"} - -echo "============================ WARNING ==============================" -echo "\nYou are about to install AWE on this system." -echo "\n* Ensure that you are using a python3.12 version." -echo "\n* The following repos are needed: " -echo "\n > coreferee" -echo "\n > holmes-extractor-expandable" -echo "\n > AWE_LanguageTool" -echo "\n > AWE_SpellCorrect" -echo "\n > AWE_Lexica" -echo "\n > AWE_Components" -echo "\n > AWE_Workbench" -echo "\n" -echo "\nUsage: " -echo "\n ./conda_fresh_install.sh [DIR] [VENV] [GIT] [PROTOBUF] [DATA] [JAVA]" -echo "\n NEW_DIR: [XXXX/nodir]" -echo "\n VENV: [venv/novenv]" -echo "\n GIT: [git/nogit]" -echo "\n BRANCH: [XXXX/main]" -echo "\n PROTOBUF: [proto/noproto]" -echo "\n DATA: [data/nodata]" -echo "\n JAVA: [java/nojava]" -echo "\n===================================================================" - -read -p "\n\n Continue [Y/N]? " -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]] -then - # Set up new dir and cd to it - if [ $NEW_DIR -ne "nodir" ] - then - echo "Setting up new dev directory..." - mkdir $NEW_DIR - cd $NEW_DIR - fi - - # Set up new conda env - if [[ $NEW_CONDA_ENV =~ ^venv$ ]] - then - echo "Setting up new venv env..." - python -m venv $NEW_DIR-env - fi - - # Activate the env - echo "Activating venv env..." - $NEW_DIR-env/bin/activate - - # Download relevant github repos - if [[ $NEW_GIT =~ ^git$ ]] - then - echo "Pulling github repos..." - git clone -b $BRANCH git@github.com:ArgLab/coreferee.git - git clone -b $BRANCH git@github.com:ArgLab/holmes-extractor-expandable.git - git clone -b $BRANCH git@github.com:ArgLab/AWE_LanguageTool.git - git clone -b $BRANCH git@github.com:ArgLab/AWE_Lexica.git - git clone -b $BRANCH git@github.com:ArgLab/AWE_Components.git - git clone -b $BRANCH git@github.com:ArgLab/AWE_Workbench.git - fi - - # Update pip - pip install pip --upgrade - - # Install repos - echo "Installing repos..." - pip install -e ./coreferee - pip install -e ./holmes-extractor-expandable - pip install -e ./AWE_LanguageTool - pip install -e ./AWE_SpellCorrect - pip install -e ./AWE_Components - pip install -e ./AWE_Workbench - - # PROTOBUF fix? - if [[ $PROTOBUF =~ ^proto$ ]] - then - pip install protobuf==3.20.0 - fi - - # Download data - if [[ $DATA =~ ^data$ ]] - then - echo "Downloading data..." - python -m awe_workbench.setup.data --develop - fi - - # Install java - if [[ $JAVA =~ ^java$ ]] - then - echo "Installing java sdk..." - # Source: https://askubuntu.com/questions/1279677/how-to-install-openjdk-14-jdk-on-ubuntu-16-04 - wget https://download.java.net/java/GA/jdk14.0.2/205943a0976c4ed48cb16f1043c5c647/12/GPL/openjdk-14.0.2_linux-x64_bin.tar.gz - - tar xvf openjdk-14.0.2_linux-x64_bin.tar.gz - - mv jdk-14.0.2 /usr/lib/jvm - - update-alternatives --install "/usr/bin/javac" "javac" "/usr/lib/jvm/jdk-14.0.2/bin/javac" 3 - update-alternatives --install "/usr/bin/java" "java" "/usr/lib/jvm/jdk-14.0.2/bin/java" 3 - update-alternatives --set "javac" "/usr/lib/jvm/jdk-14.0.2/bin/javac" - update-alternatives --set "java" "/usr/lib/jvm/jdk-14.0.2/bin/java" - - update-alternatives --config java - fi -fi diff --git a/tests/old_tests/__init__.py b/tests/old_tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/old_tests/test_awe_nlp.py b/tests/old_tests/test_awe_nlp.py deleted file mode 100644 index bf64e86..0000000 --- a/tests/old_tests/test_awe_nlp.py +++ /dev/null @@ -1,261 +0,0 @@ -""" ---- [ Test: test_awe_nlp.py ] ----------------------------------------------------------- - -Set of corresponding tests for document features found in awe_nlp.py of writingobserver. - -Author: Caleb Scott (cwscott3@ncsu.edu) - ------------------------------------------------------------------------------------------ -""" - -# --- [ IMPORTS ] ----------------------------------------------------------------------- - -import unittest -import json -import spacy -import coreferee -import spacytextblob.spacytextblob - -import awe_components.components.lexicalFeatures -import awe_components.components.syntaxDiscourseFeats -import awe_components.components.viewpointFeatures -import awe_components.components.lexicalClusters -import awe_components.components.contentSegmentation - -from awe_workbench.pipeline import pipeline_def -from examples.essays.essays import get_essay - -# --- [ CONSTS/VARS ] ------------------------------------------------------------------- - -SPACY_MODEL = 'en_core_web_lg' - -COMPONENTS = [el['component'] for el in pipeline_def] - -TEST_TEXT = "gre6.txt" - -# --- [ CLASSES ] ----------------------------------------------------------------------- - -class AWENLPTest(unittest.TestCase): - - def setUp(self): - """ - This is the basic initializer for all test classes. - - Sets up the spacy pipeline. - """ - # Initialize the pipeline - try: - self.nlp = spacy.load(SPACY_MODEL) - for comp in COMPONENTS: - self.nlp.add_pipe(comp) - except OSError as e: - print("There was an error loading 'en_core_web_lg' from spacy.") - raise OSError() from e - - # Now get the text - self.doc = self.nlp(get_essay(TEST_TEXT)) - - def test_is_academic(self): - self.assertEqual(self.doc._.AWE_Info(indicator='is_academic',summaryType='percent'), 22) - - def test_vwp_interactive_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_interactive',summaryType='percent'), 4) - - def test_is_latinate(self): - self.assertEqual(self.doc._.AWE_Info(indicator='is_latinate',summaryType='percent'), 13) - - def test_vwp_evaluation_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_evaluation',summaryType='total'), 704) - - def test_vwp_emotionword_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_emotionword',summaryType='percent'), 2) - - def test_vwp_argumentword_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_argumentword',summaryType='percent'), 100) - - def test_vwp_explicit_argument_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_explicit_argument',summaryType='percent'), 15) - - def test_vwp_statements_of_opinion_percent(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_opinion',summaryType='percent'), 78) - - def test_vwp_statements_of_fact_percent(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_statements_of_fact',summaryType='percent'), 22) - - def test_transitions_counts(self): - counts_dict = json.loads(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',summaryType='counts')) - self.assertEqual(sum(list(counts_dict.values())), 25) - - def test_transitions_positive_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['positive'])],summaryType='total'), 0) - - def test_transitions_conditional_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['conditional'])],summaryType='total'), 0) - - def test_transitions_consequential_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['consequential'])],summaryType='total'), 0) - - def test_transitions_contrastive_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['contrastive'])],summaryType='total'), 5) - - def test_transitions_counterpoint_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['counterpoint'])],summaryType='total'), 0) - - def test_transitions_comparative_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['comparative'])],summaryType='total'), 1) - - def test_transitions_crossreferential_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['crossreferential'])],summaryType='total'), 0) - - def test_transitions_illustrative_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['illustrative'])],summaryType='total'), 6) - - def test_transitions_negative_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['negative'])],summaryType='total'), 0) - - def test_transitions_emphatic_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['emphatic'])],summaryType='total'), 2) - - def test_transitions_evidentiary_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['evidentiary'])],summaryType='total'), 0) - - def test_transitions_general_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['general'])],summaryType='total'), 0) - - def test_transitions_ordinal_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['ordinal'])],summaryType='total'), 0) - - def test_transitions_purposive_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['purposive'])],summaryType='total'), 0) - - def test_transitions_periphrastic_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['periphrastic'])],summaryType='total'), 0) - - def test_transitions_hypothetical_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['hypothetical'])],summaryType='total'), 0) - - def test_transitions_summative_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['summative'])],summaryType='total'), 0) - - def test_transitions_introductory_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='transitions',filters=[('==', ['introductory'])],summaryType='total'), 5) - - def test_pos_adj_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADJ'])],summaryType='total'), 62) - - def test_pos_adv_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADV'])],summaryType='total'), 23) - - def test_pos_noun_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['NOUN'])],summaryType='total'), 189) - - def test_pos_propn_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['PROPN'])],summaryType='total'), 13) - - def test_pos_verb_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['VERB'])],summaryType='total'), 78) - - def test_pos_num_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['NUM'])],summaryType='total'), 2) - - def test_pos_adp_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['ADP'])],summaryType='total'), 81) - - def test_pos_cconj_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['CCONJ'])],summaryType='total'), 14) - - def test_pos_sconj_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['SCONJ'])],summaryType='total'), 17) - - def test_pos_aux_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['AUX'])],summaryType='total'), 36) - - def test_pos_pron_total(self): - self.assertEqual(self.doc._.AWE_Info(indicator='pos_',filters=[('==', ['PRON'])],summaryType='total'), 22) - - def test_sentence_types_counts(self): - types_dict = json.loads(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',summaryType='counts')) - self.assertEqual(sum(list(types_dict.values())), 35) - - def test_sentence_types_simple_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Simple'])],summaryType='total'), 13) - - def test_sentence_types_simple_complex_pred_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleComplexPred'])],summaryType='total'), 3) - - def test_sentence_types_simple_compound_pred_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundPred'])],summaryType='total'), 0) - - def test_sentence_types_simple_compound_complex_pred_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['SimpleCompoundComplexPred'])],summaryType='total'), 0) - - def test_sentence_types_compound_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Compound'])],summaryType='total'), 2) - - def test_sentence_types_complex_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['Complex'])],summaryType='total'), 16) - - def test_sentence_types_compound_complex_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sentence_types',filters=[('==', ['CompoundComplex'])],summaryType='total'), 1) - - def test_vwp_source_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_source',summaryType='percent'), 0) - - def test_vwp_attribution_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_attribution',summaryType='percent'), 0) - - def test_vwp_cite_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_cite',summaryType='percent'), 0) - - def test_vwp_quoted_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_quoted',summaryType='percent'), 0) - - def test_vwp_direct_speech_percent(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_direct_speech',summaryType='percent'), 0) - - def test_vwp_in_direct_speech_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_in_direct_speech',summaryType='percent'), 0) - - def test_vwp_tone_greater_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_tone',filters=[('>', [0.4])],summaryType='percent'), 1) - - def test_vwp_tone_lesser_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_tone',filters=[('<', [-0.4])],summaryType='percent'), 2) - - def test_concrete_details_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='concrete_details',summaryType='percent'), 2) - - def test_main_ideas_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='main_ideas',summaryType='total'), 9) - - def test_supporting_ideas_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='supporting_ideas',summaryType='total'), 11) - - def test_supporting_details_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='supporting_details',summaryType='total'), 6) - - def test_nSyll_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='nSyll',filters=[('>', [3])],summaryType='percent'), 10) - - def test_max_freq_lesser_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='max_freq',filters=[('<', [4])],summaryType='percent'), 9) - - def test_sents_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='sents',summaryType='total'), 35) - - def test_delimiter_n_total(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='delimiter_n',summaryType='total'), 223) - - def test_vwp_character_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='vwp_character',summaryType='percent'), 2) - - def test_in_past_tense_scope_percent(self): - self.assertEqual(self.doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='percent'), 33) - - def test_vwp_propositional_attitudes_percent(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_propositional_attitudes',summaryType='percent'), 53) - - def test_vwp_social_awareness_percent(self): - self.assertEqual(self.doc._.AWE_Info(infoType='Doc',indicator='vwp_social_awareness',summaryType='percent'), 3) - -# --- [ END ] --------------------------------------------------------------------------- diff --git a/tests/old_tests/test_lexical_clusters.py b/tests/old_tests/test_lexical_clusters.py deleted file mode 100644 index 5081323..0000000 --- a/tests/old_tests/test_lexical_clusters.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -holmes_manager.parse_and_register_document( - document_text="The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", label='GRE_Sample_Essay') - - -class LexicalFeatureTest(unittest.TestCase): - - def test_clusterInfo(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - clusterInfo = [[14, 3.6747192922762837, ['technology', 'emerge', 'market', 'Technology', 'nanotechnology'], [3, 46, 161, 212, 229, 247, 286, 307, 333, 334, 405, 537, 552, 589, 606, 623, 640, 695]], [51, 2.7333009865760953, ['thinking', 'assumption', 'argue', 'hypothetical', 'fact', 'consider', 'layman', 'actually', 'imagine'], [7, 154, 183, 196, 216, 284, 411, 459, 468, 538, 561, 594, 598]], [33, 2.6008002462296087, ['human', 'earth', 'humanity'], [11, 112, 147, 239, 277, 288, 408, 545, 579, 595, 628, 647, 701]], [31, 2.4556616643929057, ['computer', 'wireless', 'network', 'mobile', 'phone', 'communication', 'digital', 'internet'], [82, 105, 106, 189, 192, 193, 210, 255, 257, 414, 421, 435]], [43, 2.108276939094222, ['century', 'history', '19th', 'obsolete', '20th', 'prehistorical'], [16, 25, 128, 129, 221, 565, 566, 582, 644]], [8, 2.0428751576292563, ['creativity', 'create', 'imagination', 'idea', 'inspiration'], [235, 299, 356, 386, 409, 429, 475, 500, 702]], [39, 1.82370820668693, ['smallpox', 'ravage', 'vaccine', 'eradicate'], [572, 577, 591, 603, 615, 620]], [35, 1.7955112219451372, ['reliance', 'negate', 'preclude'], [159, 162, 227, 233, 273, 550]], [32, 1.7208413001912044, ['free', 'information', 'exchange', 'portal', 'visit'], [6, 85, 207, 427, 436, 439, 452, 593, 601]], [50, 1.5462672143029716, ['recent', 'past', 'change', 'previous', 'prior', 'late', 'day'], [10, 15, 33, 139, 172, 243, 564, 583]], [5, 1.5203226807322372, ['utilize', 'ability', 'convenience', 'provide', 'complete'], [108, 145, 250, 531, 539, 569, 696]], [20, 1.5090853095164767, ['dramatically', 'reach', 'increase', 'achieve'], [35, 103, 158, 329, 471, 548, 559]], [22, 1.5081563558017852, ['quandary', 'problem', 'issue', 'concern'], [173, 220, 294, 301, 340, 382, 659]], [37, 1.4794685990338161, ['medical', 'doctor', 'emergency', 'care', 'triage', 'medicine'], [438, 451, 467, 502, 503, 504, 513]], [15, 1.4314115308151094, ['revolutionize', 'introduction', 'introduce', 'generation', 'revolution', 'advent'], [48, 179, 317, 349, 415, 418]], [49, 1.3787281935846938, ['typical', 'common', 'example', 'popular', 'base'], [42, 117, 244, 310, 434, 498, 530]], [12, 1.2813941568426448, ['inconceivable', 'surprising', 'unlikely', 'impossible'], [122, 341, 481, 508, 554]], [29, 1.2761613067891782, ['allow', 'free', 'permit'], [248, 287, 367, 406, 441]], [19, 1.2591815320041972, ['play', 'run', 'chance', 'attempt', 'Goals', 'goal'], [8, 61, 72, 134, 496, 555]], [34, 1.1895910780669146, ['oppress', 'unfettered', 'dare', 'embrace'], [371, 385, 596, 687]], [64, 1.1611030478955007, ['silicon', 'micron', 'satellite', 'orbit'], [87, 92, 109, 110]], [4, 1.1552680221811462, ['link', 'bridge', 'complex', 'tense'], [2, 88, 136, 381, 516]], [3, 1.142595978062157, ['process', 'method', 'processing', 'circuit', 'technique'], [84, 203, 208, 214, 505]], [11, 1.086484137331595, ['automobile', 'car', 'transport'], [59, 187, 205, 253, 315]], [62, 1.053324555628703, ['proliferation', 'dependence', 'dependent', 'pathway'], [313, 352, 363, 457]], [46, 1.0183299389002036, ['South', 'Eastern', 'battle', 'fire', 'retreat'], [521, 524, 608, 635, 676]], [52, 1.008827238335435, ['commute', 'workday', 'leave', 'arrive'], [54, 70, 96, 477]], [41, 0.9193054136874362, ['WebMD', 'Jeffrey', 'Sachs'], [431, 484, 485]], [6, 0.9153318077803205, ['wide', 'tackle', 'target'], [93, 292, 618, 669]], [42, 0.9153318077803203, ['UN', 'Millenium', 'America', 'Europe'], [493, 494, 522, 525]], [60, 0.9024252679075014, ['scientist', 'research', 'interdisciplinary', 'discovery'], [389, 445, 472, 633]], [58, 0.898876404494382, ['surely', 'necessarily', 'exist', 'likewise'], [18, 232, 305, 351]], [16, 0.8923591745677636, ['reduction', 'efficiency', 'energy', 'elimination'], [142, 270, 330, 570]], [38, 0.8913649025069637, ['patient', 'symptom', 'heal', 'disease'], [442, 446, 515, 575]], [36, 0.8810572687224669, ['self', 'exercise', 'attitude', 'posture'], [444, 455, 680, 690]], [10, 0.8771929824561403, ['horse', 'buggy', 'race'], [344, 348, 580, 648]], [57, 0.8645533141210374, ['creatively', 'efficiently', 'systematically'], [169, 267, 617]], [2, 0.8560727661851258, ['reveal', 'release', 'previously'], [44, 245, 259, 462]], [18, 0.8398950131233597, ['global', 'warming', 'oil'], [325, 336, 337, 354]], [45, 0.8376963350785341, ['nation', 'state', 'rule', 'minority'], [357, 359, 368, 372]], [17, 0.8341056533827617, ['taxation', 'economic', 'hyperinflation'], [365, 511, 518]], [26, 0.8264462809917356, ['statement', 'informed'], [1, 133, 398, 450]], [30, 0.8209338122113904, ['additional', 'require', 'allow', 'limit'], [260, 383, 423, 654]], [44, 0.8040201005025126, ['member', 'party', 'group', 'politician'], [100, 369, 373, 391]], [24, 0.8016032064128256, ['Development', 'future', 'plan', 'implementation'], [495, 543, 609, 638]], [56, 0.7778317938745746, ['mark', 'give', 'number'], [237, 626, 642, 657]], [13, 0.7679180887372014, ['alternate', 'possibility', 'avenue'], [202, 693, 699]], [61, 0.7563025210084033, ['negatively', 'interact', 'interaction'], [4, 79, 473]], [48, 0.7540056550424129, ['live', 'life', 'family'], [28, 29, 99, 265]], [7, 0.7469654528478057, ['quick', 'look', 'short', 'make'], [38, 175, 213, 218]], [9, 0.72, ['corner', 'architect', 'draw'], [482, 490, 611]], [21, 0.6891271056661562, ['dramatic', 'scale', 'unprecedented'], [138, 326, 426]], [27, 0.6854531607006855, ['combustion', 'engine', 'fuel'], [65, 66, 321]], [63, 0.6772009029345373, ['reflection', 'internal', 'contrast'], [39, 64, 395]], [23, 0.6751687921980495, ['solve', 'demand', 'solution'], [171, 331, 378]], [54, 0.6181318681318683, ['experience', 'environment'], [12, 519, 629]], [28, 0.614334470989761, ['turn', 'open', 'close'], [125, 456, 463]], [25, 0.6134969325153373, ['hope', 'hopeful'], [534, 540, 689]]] - self.assertEqual(doc._.clusterInfo, clusterInfo) - - def test_mean_main_cluster_span(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_main_cluster_span, - 199.44444444444446) - - def test_median_main_cluster_span(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_main_cluster_span, 187.0) - - def test_max_main_cluster_span(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_main_cluster_span, 375) - - def test_min_main_cluster_span(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_main_cluster_span, 3) - - def test_stdev_main_cluster_span(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_main_cluster_span, - 108.55696375214704) - - def test_devwords(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - devlist = [token._.devword for token in doc] - devwords = [False, True, True, False, True, False, True, False, True, False, True, False, True, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, True, True, False, False, False, True, False, True, False, False, True, True, False, False, True, False, True, False, False, False, True, False, False, False, False, False, True, False, False, False, False, True, False, True, False, False, True, True, True, False, False, False, True, False, True, False, False, False, False, True, False, True, False, False, False, False, True, True, False, True, True, False, False, False, True, True, False, False, True, False, False, True, True, False, False, True, False, False, False, False, True, True, True, False, False, False, False, False, False, True, True, False, False, False, True, False, False, True, False, False, False, False, False, False, False, True, True, False, True, False, True, True, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, True, False, False, False, False, False, False, True, False, True, True, True, False, True, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, True, True, False, True, False, True, True, False, False, False, False, True, True, False, False, False, True, False, True, False, False, False, False, False, False, True, False, False, False, False, True, True, False, True, False, True, False, False, True, False, False, True, True, True, False, False, True, False, True, False, False, True, False, False, False, False, False, True, True, False, False, False, False, True, False, True, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, True, False, True, False, False, False, False, True, False, True, False, False, False, True, False, False, False, False, True, False, False, True, False, True, False, True, False, False, False, True, True, False, False, True, True, False, False, True, True, True, False, False, False, False, True, True, False, False, True, True, False, False, True, False, False, False, 1, True, False, True, True, False, True, False, True, True, False, True, False, False, False, True, False, True, False, True, True, True, False, True, True, True, False, False, True, False, True, False, False, True, True, True, False, True, True, False, True, True, False, True, False, False, False, True, False, False, True, False, False, False, False, False, False, False, True, False, False, True, False, False, False, False, False, True, False, False, True, False, False, False, False, True, False, False, True, True, False, True, False, True, False, False, True, False, True, False, True, True, False, True, True, False, True, True, True, False, False, False, True, True, True, False, False, True, True, True, False, False, False, False, True, True, False, False, False, True, False, False, False, True, True, True, False, True, False, True, False, False, False, True, True, False, True, True, False, False, False, False, True, False, False, True, True, True, True, False, True, False, True, False, True, True, True, True, False, False, True, True, False, True, False, True, False, True, True, False, True, True, False, True, True, False, True, True, False, False, False, False, True, True, False, False, True, False, False, False, False, True, True, False, False, True, False, False, False, False, True, False, True, False, False, False, True, True, False, False, False, True, False, False, False, False, True, False, False, True, False, True, True, False, True, False, False, True, False, True, False, False, True, False, False, True, False, False, False, False, False, False, False, True, False, True, False, False, True, False, False, False, False, True, False, True, False, False, False, False, True, True, False, True, False, False, False, True, False, True, True, False, True, False, False, False, False, False, True, False, False, True, False, False, False, True, False, True, False, False, True, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, True, False, False, True, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, True, False, False, True, True, False, False, False, False, False, False, True, False, True, True, False, False, True, False, False, True, False, False, True, False, False, True, False] - self.assertEqual(devlist, devwords) - diff --git a/tests/old_tests/test_lexical_features.py b/tests/old_tests/test_lexical_features.py deleted file mode 100644 index 7002cb4..0000000 --- a/tests/old_tests/test_lexical_features.py +++ /dev/null @@ -1,457 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -holmes_manager.parse_and_register_document( - document_text="The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", label='GRE_Sample_Essay') - - -class LexicalFeatureTest(unittest.TestCase): - - def test_lemmas(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - lemmas = ['the', 'statement', 'link', 'technology', 'negatively', 'with', 'free', 'thinking', 'play', 'on', 'recent', 'human', 'experience', 'over', 'the', 'past', 'century', None, 'surely', 'there', 'have', 'be', 'no', 'time', 'in', 'history', 'where', 'the', 'live', 'life', 'of', 'people', 'have', 'change', 'more', 'dramatically', None, 'a', 'quick', 'reflection', 'on', 'a', 'typical', 'day', 'reveal', 'how', 'technology', 'have', 'revolutionize', 'the', 'world', None, 'Most', 'people', 'commute', 'to', 'work', 'in', 'an', 'automobile', 'that', 'run', 'on', 'an', 'internal', 'combustion', 'engine', None, 'during', 'the', 'workday', None, 'chance', 'be', 'high', 'that', 'the', 'employee', 'will', 'interact', 'with', 'a', 'computer', 'that', 'process', 'information', 'on', 'silicon', 'bridge', 'that', 'be', None, 'micron', 'wide', None, 'upon', 'leave', 'home', None, 'family', 'member', 'will', 'be', 'reach', 'through', 'wireless', 'network', 'that', 'utilize', 'satellite', 'orbit', 'the', 'earth', None, 'each', 'of', 'these', 'common', 'occurrence', 'could', 'have', 'be', 'inconceivable', 'at', 'the', 'turn', 'of', 'the', '19th', 'century', None, None, 'the', 'statement', 'attempt', 'to', 'bridge', 'these', 'dramatic', 'change', 'to', 'a', 'reduction', 'in', 'the', 'ability', 'for', 'human', 'to', 'think', 'for', 'themselves', None, 'the', 'assumption', 'be', 'that', 'an', 'increase', 'reliance', 'on', 'technology', 'negate', 'the', 'need', 'for', 'people', 'to', 'think', 'creatively', 'to', 'solve', 'previous', 'quandary', None, 'look', 'back', 'at', 'the', 'introduction', None, 'one', 'could', 'argue', 'that', 'without', 'a', 'car', None, 'computer', None, 'or', 'mobile', 'phone', None, 'the', 'hypothetical', 'worker', 'would', 'need', 'to', 'find', 'alternate', 'method', 'of', 'transport', None, 'information', 'processing', 'and', 'communication', None, 'technology', 'short', 'circuit', 'this', 'thinking', 'by', 'make', 'the', 'problem', 'obsolete', None, None, 'however', None, 'this', 'reliance', 'on', 'technology', 'do', 'not', 'necessarily', 'preclude', 'the', 'creativity', 'that', 'mark', 'the', 'human', 'specie', None, 'the', 'prior', 'example', 'reveal', 'that', 'technology', 'allow', 'for', 'convenience', None, 'the', 'car', None, 'computer', 'and', 'phone', 'all', 'release', 'additional', 'time', 'for', 'people', 'to', 'live', 'more', 'efficiently', None, 'this', 'efficiency', 'do', 'not', 'preclude', 'the', 'need', 'for', 'human', 'to', 'think', 'for', 'themselves', None, 'in', 'fact', None, 'technology', 'free', 'humanity', 'to', 'not', 'only', 'tackle', 'new', 'problem', None, 'but', 'may', 'itself', 'create', 'new', 'issue', 'that', 'do', 'not', 'exist', 'without', 'technology', None, 'for', 'example', None, 'the', 'proliferation', 'of', 'automobile', 'have', 'introduce', 'a', 'need', 'for', 'fuel', 'conservation', 'on', 'a', 'global', 'scale', None, 'with', 'increase', 'energy', 'demand', 'from', 'emerge', 'market', None, 'global', 'warming', 'become', 'a', 'concern', 'inconceivable', 'to', 'the', 'horse', None, 'and', None, 'buggy', 'generation', None, 'likewise', 'dependence', 'on', 'oil', 'have', 'create', 'nation', None, 'state', 'that', 'be', 'not', 'dependent', 'on', 'taxation', None, 'allow', 'rule', 'party', 'to', 'oppress', 'minority', 'group', 'such', 'as', 'woman', None, 'solution', 'to', 'these', 'complex', 'problem', 'require', 'the', 'unfettered', 'imagination', 'of', 'maverick', 'scientist', 'and', 'politician', None, None, 'in', 'contrast', 'to', 'the', 'statement', None, 'we', 'can', 'even', 'see', 'how', 'technology', 'free', 'the', 'human', 'imagination', None, 'consider', 'how', 'the', 'digital', 'revolution', 'and', 'the', 'advent', 'of', 'the', 'internet', 'have', 'allow', 'for', 'an', 'unprecedented', 'exchange', 'of', 'idea', None, 'WebMD', None, 'a', 'popular', 'internet', 'portal', 'for', 'medical', 'information', None, 'permit', 'patient', 'to', 'self', 'research', 'symptom', 'for', 'a', 'more', 'informed', 'doctor', 'visit', None, 'this', 'exercise', 'open', 'pathway', 'of', 'thinking', 'that', 'be', 'previously', 'close', 'off', 'to', 'the', 'medical', 'layman', None, 'with', 'increase', 'interdisciplinary', 'interaction', None, 'inspiration', 'can', 'arrive', 'from', 'the', 'most', 'surprising', 'corner', None, 'Jeffrey', 'Sachs', None, 'one', 'of', 'the', 'architect', 'of', 'the', 'UN', 'Millenium', 'Development', 'Goals', None, 'base', 'his', 'idea', 'on', 'emergency', 'care', 'triage', 'technique', None, 'the', 'unlikely', 'marriage', 'of', 'economic', 'and', 'medicine', 'have', 'heal', 'tense', None, 'hyperinflation', 'environment', 'from', 'South', 'America', 'to', 'Eastern', 'Europe', None, None, 'this', 'last', 'example', 'provide', 'the', 'most', 'hope', 'in', 'how', 'technology', 'actually', 'provide', 'hope', 'to', 'the', 'future', 'of', 'humanity', None, 'by', 'increase', 'our', 'reliance', 'on', 'technology', None, 'impossible', 'goal', 'can', 'now', 'be', 'achieve', None, 'consider', 'how', 'the', 'late', '20th', 'century', 'witness', 'the', 'complete', 'elimination', 'of', 'smallpox', None, 'this', 'disease', 'have', 'ravage', 'the', 'human', 'race', 'since', 'prehistorical', 'day', None, 'and', 'yet', 'with', 'the', 'technology', 'of', 'vaccine', None, 'free', 'thinking', 'human', 'dare', 'to', 'imagine', 'a', 'world', 'free', 'of', 'smallpox', None, 'use', 'technology', None, 'battle', 'plan', 'be', 'draw', 'out', None, 'and', 'smallpox', 'be', 'systematically', 'target', 'and', 'eradicate', None, None, 'Technology', 'will', 'always', 'mark', 'the', 'human', 'experience', None, 'from', 'the', 'discovery', 'of', 'fire', 'to', 'the', 'implementation', 'of', 'nanotechnology', None, 'give', 'the', 'history', 'of', 'the', 'human', 'race', None, 'there', 'will', 'be', 'no', 'limit', 'to', 'the', 'number', 'of', 'problem', None, 'both', 'new', 'and', 'old', None, 'for', 'we', 'to', 'tackle', None, 'there', 'be', 'no', 'need', 'to', 'retreat', 'to', 'a', 'Luddite', 'attitude', 'to', 'new', 'thing', None, 'but', 'rather', 'embrace', 'a', 'hopeful', 'posture', 'to', 'the', 'possibility', 'that', 'technology', 'provide', 'for', 'new', 'avenue', 'of', 'human', 'imagination', None] - self.assertEqual(doc._.lemmas,lemmas) - - def test_word_types(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - word_types = ['19th', '20th', 'A', 'America', 'By', 'Consider', 'Development', 'During', 'Each', 'Eastern', 'Europe', 'For', 'Given', 'Goals', 'However', 'In', 'Jeffrey', 'Likewise', 'Looking', 'Luddite', 'Millenium', 'Most', 'Sachs', 'Solutions', 'South', 'Surely', 'Technology', 'The', 'There', 'This', 'UN', 'Upon', 'Using', 'WebMD', 'With', 'a', 'ability', 'achieved', 'actually', 'additional', 'advent', 'all', 'allowed', 'allowing', 'allows', 'alternate', 'always', 'an', 'and', 'architects', 'are', 'argue', 'arrive', 'as', 'assumption', 'at', 'attempts', 'attitude', 'automobile', 'automobiles', 'avenues', 'back', 'based', 'battle', 'be', 'becomes', 'been', 'both', 'bridge', 'bridges', 'buggy', 'but', 'by', 'can', 'car', 'care', 'century', 'chances', 'changed', 'changes', 'circuits', 'closed', 'combustion', 'common', 'communication', 'commute', 'complete', 'complex', 'computer', 'concern', 'conservation', 'contrast', 'convenience', 'corners', 'could', 'create', 'created', 'creatively', 'creativity', 'dared', 'day', 'days', 'demands', 'dependence', 'dependent', 'did', 'digital', 'discovery', 'disease', 'doctor', 'does', 'dramatic', 'dramatically', 'drawn', 'earth', 'economics', 'efficiency', 'efficiently', 'elimination', 'embrace', 'emergency', 'emerging', 'employee', 'energy', 'engine', 'environments', 'eradicated', 'even', 'example', 'examples', 'exchange', 'exercise', 'exist', 'experience', 'fact', 'family', 'find', 'fire', 'for', 'free', 'frees', 'from', 'fuel', 'future', 'generation', 'global', 'goals', 'groups', 'had', 'has', 'have', 'healed', 'high', 'his', 'history', 'home', 'hope', 'hopeful', 'horse', 'how', 'human', 'humanity', 'humans', 'hyperinflation', 'hypothetical', 'ideas', 'imagination', 'imaginations', 'imagine', 'implementation', 'impossible', 'in', 'inconceivable', 'increased', 'increasing', 'information', 'informed', 'inspiration', 'interact', 'interactions', 'interdisciplinary', 'internal', 'internet', 'introduced', 'introduction', 'is', 'issues', 'itself', 'last', 'late', 'layman', 'leaving', 'limit', 'linking', 'live', 'lived', 'lives', 'making', 'mark', 'markets', 'marks', 'marriage', 'maverick', 'may', 'medical', 'medicine', 'members', 'methods', 'microns', 'minority', 'mobile', 'more', 'most', 'nanotechnology', 'nation', 'necessarily', 'need', 'negates', 'negatively', 'networks', 'new', 'no', 'not', 'now', 'number', 'obsolete', 'occurrences', 'of', 'off', 'oil', 'old', 'on', 'one', 'only', 'opens', 'oppress', 'or', 'orbiting', 'our', 'out', 'over', 'parties', 'past', 'pathways', 'patients', 'people', 'permits', 'phone', 'plans', 'plays', 'politicians', 'popular', 'portal', 'possibilities', 'posture', 'preclude', 'prehistorical', 'previous', 'previously', 'prior', 'problems', 'processes', 'processing', 'proliferation', 'provides', 'quandaries', 'quick', 'race', 'rather', 'ravaged', 'reached', 'recent', 'reduction', 'reflection', 'release', 'reliance', 'require', 'research', 'retreat', 'reveal', 'reveals', 'revolution', 'revolutionized', 'ruling', 'runs', 'satellites', 'scale', 'scientists', 'see', 'self', 'short', 'silicon', 'since', 'smallpox', 'solve', 'species', 'statement', 'states', 'such', 'surprising', 'symptoms', 'systematically', 'tackle', 'targeted', 'taxation', 'techniques', 'technology', 'tense', 'that', 'the', 'themselves', 'there', 'these', 'things', 'think', 'thinking', 'this', 'through', 'time', 'to', 'transport', 'triage', 'turn', 'typical', 'unfettered', 'unlikely', 'unprecedented', 'us', 'utilize', 'vaccines', 'visit', 'warming', 'was', 'we', 'were', 'where', 'wide', 'will', 'wireless', 'with', 'without', 'witnessed', 'women', 'work', 'workday', 'worker', 'world', 'would', 'yet'] - self.assertEqual(doc._.word_types,word_types) - - def test_morphroot(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - morphroot = ['the', 'state', 'link', 'technology', 'negative', 'with', 'free', 'think', 'play', 'on', 'recent', 'human', 'experience', 'over', 'the', 'past', 'century', None, 'sure', 'there', 'have', 'be', 'no', 'time', 'in', 'history', 'where', 'the', 'live', 'live', 'of', 'people', 'have', 'change', 'more', 'drama', None, 'a', 'quick', 'reflect', 'on', 'a', 'typical', 'day', 'reveal', 'how', 'technology', 'have', 'revolution', 'the', 'world', None, 'most', 'people', 'commute', 'to', 'work', 'in', 'a', 'automobile', 'that', 'run', 'on', 'a', 'internal', 'combustion', 'engine', None, 'during', 'the', 'workday', None, 'chance', 'be', 'high', 'that', 'the', 'employ', 'will', 'interact', 'with', 'a', 'computer', 'that', 'process', 'inform', 'on', 'silicon', 'bridge', 'that', 'be', None, 'micron', 'wide', None, 'upon', 'leave', 'home', None, 'family', 'member', 'will', 'be', 'reach', 'through', 'wireless', 'network', 'that', 'utilise', 'satellite', 'orbit', 'the', 'earth', None, 'each', 'of', 'this', 'common', 'occur', 'could', 'have', 'be', 'conceive', 'at', 'the', 'turn', 'of', 'the', '19th', 'century', None, None, 'the', 'state', 'attempt', 'to', 'bridge', 'this', 'drama', 'change', 'to', 'a', 'reduce', 'in', 'the', 'able', 'for', 'human', 'to', 'think', 'for', 'they', None, 'the', 'assumption', 'be', 'that', 'a', 'increase', 'rely', 'on', 'technology', 'negate', 'the', 'need', 'for', 'people', 'to', 'think', 'create', 'to', 'solve', 'previous', 'quandary', None, 'look', 'back', 'at', 'the', 'introduce', None, 'one', 'could', 'argue', 'that', 'without', 'a', 'car', None, 'computer', None, 'or', 'mobile', 'telephone', None, 'the', 'hypothetical', 'work', 'would', 'need', 'to', 'find', 'alternate', 'method', 'of', 'transport', None, 'inform', 'process', 'and', 'communicate', None, 'technology', 'short', 'circuit', 'this', 'think', 'by', 'make', 'the', 'problem', 'obsolete', None, None, 'however', None, 'this', 'rely', 'on', 'technology', 'do', 'not', 'necessary', 'preclude', 'the', 'create', 'that', 'mark', 'the', 'human', 'species', None, 'the', 'prior', 'example', 'reveal', 'that', 'technology', 'allow', 'for', 'convenience', None, 'the', 'car', None, 'computer', 'and', 'telephone', 'all', 'release', 'add', 'time', 'for', 'people', 'to', 'live', 'more', 'efficient', None, 'this', 'efficient', 'do', 'not', 'preclude', 'the', 'need', 'for', 'human', 'to', 'think', 'for', 'they', None, 'in', 'fact', None, 'technology', 'free', 'human', 'to', 'not', 'only', 'tackle', 'new', 'problem', None, 'but', 'may', 'it', 'create', 'new', 'issue', 'that', 'do', 'not', 'exist', 'without', 'technology', None, 'for', 'example', None, 'the', 'proliferate', 'of', 'automobile', 'have', 'introduce', 'a', 'need', 'for', 'fuel', 'conserve', 'on', 'a', 'global', 'scale', None, 'with', 'increase', 'energy', 'demand', 'from', 'emerge', 'market', None, 'global', 'warm', 'become', 'a', 'concern', 'conceive', 'to', 'the', 'horse', None, 'and', None, 'buggy', 'generation', None, 'likewise', 'dependence', 'on', 'oil', 'have', 'create', 'nation', None, 'states', 'that', 'be', 'not', 'depend', 'on', 'tax', None, 'allow', 'rule', 'party', 'to', 'oppress', 'minor', 'group', 'such', 'as', 'woman', None, 'solve', 'to', 'this', 'complex', 'problem', 'require', 'the', 'fetter', 'imagine', 'of', 'maverick', 'science', 'and', 'politics', None, None, 'in', 'contrast', 'to', 'the', 'state', None, 'we', 'can', 'even', 'see', 'how', 'technology', 'free', 'the', 'human', 'imagine', None, 'consider', 'how', 'the', 'digital', 'revolution', 'and', 'the', 'advent', 'of', 'the', 'internet', 'have', 'allow', 'for', 'a', 'precede', 'exchange', 'of', 'idea', None, 'WebMD', None, 'a', 'popular', 'internet', 'portal', 'for', 'medical', 'inform', None, 'permit', 'patients', 'to', 'self', 'research', 'symptom', 'for', 'a', 'more', 'inform', 'doctor', 'visit', None, 'this', 'exercise', 'open', 'pathway', 'of', 'think', 'that', 'be', 'previous', 'closed', 'off', 'to', 'the', 'medical', 'layman', None, 'with', 'increase', 'discipline', 'interact', None, 'inspire', 'can', 'arrive', 'from', 'the', 'most', 'surprise', 'corner', None, 'Jeffrey', 'Sachs', None, 'one', 'of', 'the', 'architect', 'of', 'the', 'un', 'millennium', 'develop', 'goal', None, 'base', 'he', 'idea', 'on', 'emergency', 'care', 'triage', 'technique', None, 'the', 'likely', 'marry', 'of', 'economy', 'and', 'medicine', 'have', 'heal', 'tense', None, 'inflate', 'environment', 'from', 'south', 'America', 'to', 'eastern', 'Europe', None, None, 'this', 'last', 'example', 'provide', 'the', 'most', 'hope', 'in', 'how', 'technology', 'actual', 'provide', 'hope', 'to', 'the', 'future', 'of', 'human', None, 'by', 'increase', 'we', 'rely', 'on', 'technology', None, 'possible', 'goal', 'can', 'now', 'be', 'achieve', None, 'consider', 'how', 'the', 'late', '20th', 'century', 'witness', 'the', 'complete', 'eliminate', 'of', 'smallpox', None, 'this', 'disease', 'have', 'ravage', 'the', 'human', 'race', 'since', 'prehistorical', 'day', None, 'and', 'yet', 'with', 'the', 'technology', 'of', 'vaccine', None, 'free', 'think', 'human', 'dare', 'to', 'imagine', 'a', 'world', 'free', 'of', 'smallpox', None, 'use', 'technology', None, 'battle', 'plan', 'be', 'draw', 'out', None, 'and', 'smallpox', 'be', 'system', 'target', 'and', 'eradicate', None, None, 'technology', 'will', 'always', 'mark', 'the', 'human', 'experience', None, 'from', 'the', 'discover', 'of', 'fire', 'to', 'the', 'implement', 'of', 'nanotechnology', None, 'give', 'the', 'history', 'of', 'the', 'human', 'race', None, 'there', 'will', 'be', 'no', 'limit', 'to', 'the', 'number', 'of', 'problem', None, 'both', 'new', 'and', 'old', None, 'for', 'we', 'to', 'tackle', None, 'there', 'be', 'no', 'need', 'to', 'retreat', 'to', 'a', 'luddite', 'attitude', 'to', 'new', 'thing', None, 'but', 'rather', 'embrace', 'a', 'hope', 'posture', 'to', 'the', 'possible', 'that', 'technology', 'provide', 'for', 'new', 'avenue', 'of', 'human', 'imagine', None] - self.assertEqual(doc._.morphroot,morphroot) - - def test_wf_type_count(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.wf_type_count,224) - - def test_lemma_type_count(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.lemma_type_count,252) - - - def test_lemma_type_count(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.type_count,265) - - def test_lemma_tokene_count(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.token_count,345) - - def test_syllable_counts(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - syllable_counts = [1, 2, 2, 4, 4, 1, 1, 2, 1, 1, 2, 2, 4, 2, 1, 1, 3, None, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 2, 1, 2, 1, 5, None, 1, 1, 3, 1, 1, 3, 1, 2, 1, 4, 1, 5, 1, 1, None, 1, 2, 2, 1, 1, 1, 1, 4, 1, 1, 1, 1, 3, 3, 2, None, 2, 1, 2, None, 2, 1, 1, 1, 1, 3, 1, 3, 1, 1, 3, 1, 3, 4, 1, 3, 2, 1, 1, None, 2, 1, None, 2, 2, 1, None, 3, 2, 1, 1, 2, 1, 2, 2, 1, 3, 4, 3, 1, 1, None, 1, 1, 1, 2, 4, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 3, None, None, 1, 2, 2, 1, 1, 1, 3, 2, 1, 1, 3, 1, 1, 4, 1, 2, 1, 1, 1, 2, None, 1, 3, 1, 1, 1, 2, 3, 1, 4, 3, 1, 1, 1, 2, 1, 1, 4, 1, 1, 3, 3, None, 2, 1, 1, 1, 4, None, 1, 1, 2, 1, 2, 1, 1, None, 3, None, 1, 2, 1, None, 1, 5, 2, 1, 1, 1, 1, 3, 2, 1, 2, None, 4, 3, 1, 5, None, 4, 1, 2, 1, 2, 1, 2, 1, 2, 3, None, None, 3, None, 1, 3, 1, 4, 1, 1, 5, 2, 1, 5, 1, 1, 1, 2, 2, None, 1, 2, 3, 2, 1, 4, 2, 1, 4, None, 1, 1, None, 3, 1, 1, 1, 2, 4, 1, 1, 2, 1, 1, 1, 4, None, 1, 4, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, None, 1, 1, None, 4, 1, 4, 1, 1, 2, 2, 1, 2, None, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 4, None, 1, 3, None, 1, 5, 1, 5, 1, 3, 1, 1, 1, 2, 4, 1, 1, 2, 1, None, 1, 3, 3, 2, 1, 3, 2, None, 2, 2, 3, 1, 2, 5, 1, 1, 1, None, 1, None, 2, 4, None, 2, 3, 1, 1, 1, 3, 2, None, 1, 1, 1, 1, 3, 1, 3, None, 3, 2, 2, 1, 2, 4, 1, 1, 1, 2, None, 3, 1, 1, 2, 2, 2, 1, 4, 5, 1, 3, 2, 1, 4, None, None, 1, 2, 1, 1, 2, None, 1, 1, 2, 1, 1, 4, 1, 1, 2, 5, None, 3, 1, 1, 3, 4, 1, 1, 2, 1, 1, 3, 1, 2, 1, 1, 5, 2, 1, 2, None, 1, None, 1, 3, 3, 2, 1, 3, 4, None, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, None, 1, 3, 2, 2, 1, 2, 1, 1, 3, 1, 1, 1, 1, 3, 2, None, 1, 2, 7, 4, None, 4, 1, 2, 1, 1, 1, 3, 2, None, 2, 1, None, 1, 1, 1, 3, 1, 1, 1, 3, 4, 1, None, 2, 1, 2, 1, 4, 1, 2, 3, None, 1, 3, 2, 1, 4, 1, 3, 1, 2, 1, None, 5, 4, 1, 1, 4, 1, 2, 2, None, None, 1, 1, 3, 3, 1, 1, 1, 1, 1, 4, 4, 3, 1, 1, 1, 2, 1, 4, None, 1, 3, 1, 3, 1, 4, None, 4, 1, 1, 1, 1, 2, None, 3, 1, 1, 1, 1, 3, 3, 1, 2, 5, 1, 2, None, 1, 2, 1, 2, 1, 2, 1, 1, 5, 1, None, 1, 1, 1, 1, 4, 1, 3, None, 1, 2, 2, 2, 1, 3, 1, 1, 1, 1, 2, None, 2, 4, None, 2, 1, 1, 1, 1, None, 1, 2, 1, 6, 3, 1, 5, None, None, 4, 1, 2, 1, 1, 2, 4, None, 1, 1, 4, 1, 1, 1, 1, 5, 1, 6, None, 2, 1, 3, 1, 1, 2, 1, None, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, None, 1, 1, 1, 1, None, 1, 1, 1, 2, None, 1, 1, 1, 1, 1, 2, 1, 1, 2, 3, 1, 1, 1, None, 1, 2, 2, 1, 2, 2, 1, 1, 5, 1, 4, 3, 1, 1, 3, 1, 2, 5, None] - nSyllables = doc._.nSyllables - self.assertEqual(syllable_counts,nSyllables) - - def test_mean_nSylls(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - nSylls = 2.4840579710144928 - self.assertEqual(doc._.mean_nSyll,nSylls) - - def test_med_nSylls(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_nSyll,2.0) - - def test_max_nSylls(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_nSyll,7.0) - - def test_min_nSylls(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_nSyll,1.0) - - def test_std_nSylls(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_nSyll,1.2033894945940653) - - def test_sqrtNChars(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - sqrtNChars = [1.7320508075688772, 3.0, 2.6457513110645907, 3.1622776601683795, 3.1622776601683795, 2.0, 2.0, 2.8284271247461903, 2.23606797749979, 1.4142135623730951, 2.449489742783178, 2.23606797749979, 3.1622776601683795, 2.0, 1.7320508075688772, 2.0, 2.6457513110645907, 1.0, 2.449489742783178, 2.23606797749979, 1.7320508075688772, 2.0, 1.4142135623730951, 2.0, 1.4142135623730951, 2.6457513110645907, 2.23606797749979, 1.7320508075688772, 2.23606797749979, 2.23606797749979, 1.4142135623730951, 2.449489742783178, 2.0, 2.6457513110645907, 2.0, 3.4641016151377544, 1.0, 1.0, 2.23606797749979, 3.1622776601683795, 1.4142135623730951, 1.0, 2.6457513110645907, 1.7320508075688772, 2.6457513110645907, 1.7320508075688772, 3.1622776601683795, 1.7320508075688772, 3.7416573867739413, 1.7320508075688772, 2.23606797749979, 1.0, 2.0, 2.449489742783178, 2.6457513110645907, 1.4142135623730951, 2.0, 1.4142135623730951, 1.4142135623730951, 3.1622776601683795, 2.0, 2.0, 1.4142135623730951, 1.4142135623730951, 2.8284271247461903, 3.1622776601683795, 2.449489742783178, 1.0, 2.449489742783178, 1.7320508075688772, 2.6457513110645907, 1.0, 2.6457513110645907, 1.7320508075688772, 2.0, 2.0, 1.7320508075688772, 2.8284271247461903, 2.0, 2.8284271247461903, 2.0, 1.0, 2.8284271247461903, 2.0, 3.0, 3.3166247903554, 1.4142135623730951, 2.6457513110645907, 2.6457513110645907, 2.0, 1.7320508075688772, 1.7320508075688772, 2.6457513110645907, 2.0, 1.0, 2.0, 2.6457513110645907, 2.0, 1.0, 2.449489742783178, 2.6457513110645907, 2.0, 1.4142135623730951, 2.6457513110645907, 2.6457513110645907, 2.8284271247461903, 2.8284271247461903, 2.0, 2.6457513110645907, 3.1622776601683795, 2.8284271247461903, 1.7320508075688772, 2.23606797749979, 1.0, 2.0, 1.4142135623730951, 2.23606797749979, 2.449489742783178, 3.3166247903554, 2.23606797749979, 2.0, 2.0, 3.605551275463989, 1.4142135623730951, 1.7320508075688772, 2.0, 1.4142135623730951, 1.7320508075688772, 2.0, 2.6457513110645907, 1.0, 1.4142135623730951, 1.7320508075688772, 3.0, 2.8284271247461903, 1.4142135623730951, 2.449489742783178, 2.23606797749979, 2.8284271247461903, 2.6457513110645907, 1.4142135623730951, 1.0, 3.0, 1.4142135623730951, 1.7320508075688772, 2.6457513110645907, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.23606797749979, 1.7320508075688772, 3.1622776601683795, 1.0, 1.7320508075688772, 3.1622776601683795, 1.4142135623730951, 2.0, 1.4142135623730951, 3.0, 2.8284271247461903, 1.4142135623730951, 3.1622776601683795, 2.6457513110645907, 1.7320508075688772, 2.0, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.23606797749979, 3.1622776601683795, 1.4142135623730951, 2.23606797749979, 2.8284271247461903, 3.1622776601683795, 1.0, 2.6457513110645907, 2.0, 1.4142135623730951, 1.7320508075688772, 3.4641016151377544, 1.0, 1.7320508075688772, 2.23606797749979, 2.23606797749979, 2.0, 2.6457513110645907, 1.0, 1.7320508075688772, 1.0, 2.8284271247461903, 1.0, 1.4142135623730951, 2.449489742783178, 2.23606797749979, 1.0, 1.7320508075688772, 3.4641016151377544, 2.449489742783178, 2.23606797749979, 2.0, 1.4142135623730951, 2.0, 3.0, 2.6457513110645907, 1.4142135623730951, 3.0, 1.0, 3.3166247903554, 3.1622776601683795, 1.7320508075688772, 3.605551275463989, 1.0, 3.1622776601683795, 2.23606797749979, 2.8284271247461903, 2.0, 2.8284271247461903, 1.4142135623730951, 2.449489742783178, 1.7320508075688772, 2.8284271247461903, 2.8284271247461903, 1.0, 1.4142135623730951, 2.6457513110645907, 1.0, 2.0, 2.8284271247461903, 1.4142135623730951, 3.1622776601683795, 2.0, 1.7320508075688772, 3.3166247903554, 2.8284271247461903, 1.7320508075688772, 3.1622776601683795, 2.0, 2.23606797749979, 1.7320508075688772, 2.23606797749979, 2.6457513110645907, 1.0, 1.7320508075688772, 2.23606797749979, 2.8284271247461903, 2.449489742783178, 2.0, 3.1622776601683795, 2.449489742783178, 1.7320508075688772, 3.3166247903554, 1.0, 1.7320508075688772, 1.7320508075688772, 1.0, 2.8284271247461903, 1.7320508075688772, 2.23606797749979, 1.7320508075688772, 2.6457513110645907, 3.1622776601683795, 2.0, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.0, 2.0, 3.3166247903554, 1.0, 2.0, 3.1622776601683795, 2.0, 1.7320508075688772, 2.8284271247461903, 1.7320508075688772, 2.0, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.23606797749979, 1.7320508075688772, 3.1622776601683795, 1.0, 1.4142135623730951, 2.0, 1.0, 3.1622776601683795, 2.23606797749979, 2.8284271247461903, 1.4142135623730951, 1.7320508075688772, 2.0, 2.449489742783178, 1.7320508075688772, 2.8284271247461903, 1.0, 1.7320508075688772, 1.7320508075688772, 2.449489742783178, 2.449489742783178, 1.7320508075688772, 2.449489742783178, 2.0, 1.7320508075688772, 1.7320508075688772, 2.23606797749979, 2.6457513110645907, 3.1622776601683795, 1.0, 1.7320508075688772, 2.6457513110645907, 1.0, 1.7320508075688772, 3.605551275463989, 1.4142135623730951, 3.3166247903554, 1.7320508075688772, 3.1622776601683795, 1.0, 2.0, 1.7320508075688772, 2.0, 3.4641016151377544, 1.4142135623730951, 1.0, 2.449489742783178, 2.23606797749979, 1.0, 2.0, 3.1622776601683795, 2.449489742783178, 2.6457513110645907, 2.0, 2.8284271247461903, 2.6457513110645907, 1.0, 2.449489742783178, 2.6457513110645907, 2.6457513110645907, 1.0, 2.6457513110645907, 3.605551275463989, 1.4142135623730951, 1.7320508075688772, 2.23606797749979, 1.0, 1.7320508075688772, 1.0, 2.23606797749979, 3.1622776601683795, 1.0, 2.8284271247461903, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772, 1.7320508075688772, 2.6457513110645907, 2.449489742783178, 1.0, 2.449489742783178, 2.0, 1.7320508075688772, 1.7320508075688772, 3.0, 1.4142135623730951, 2.8284271247461903, 1.0, 2.8284271247461903, 2.449489742783178, 2.6457513110645907, 1.4142135623730951, 2.6457513110645907, 2.8284271247461903, 2.449489742783178, 2.0, 1.4142135623730951, 2.23606797749979, 1.0, 3.0, 1.4142135623730951, 2.23606797749979, 2.6457513110645907, 2.8284271247461903, 2.6457513110645907, 1.7320508075688772, 3.1622776601683795, 3.4641016151377544, 1.4142135623730951, 2.8284271247461903, 3.1622776601683795, 1.7320508075688772, 3.3166247903554, 1.0, 1.4142135623730951, 1.4142135623730951, 2.8284271247461903, 1.4142135623730951, 1.7320508075688772, 3.0, 1.0, 1.4142135623730951, 1.7320508075688772, 2.0, 1.7320508075688772, 1.7320508075688772, 3.1622776601683795, 2.23606797749979, 1.7320508075688772, 2.23606797749979, 3.3166247903554, 1.0, 2.8284271247461903, 1.7320508075688772, 1.7320508075688772, 2.6457513110645907, 3.1622776601683795, 1.7320508075688772, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 1.7320508075688772, 2.8284271247461903, 1.7320508075688772, 2.6457513110645907, 1.7320508075688772, 1.4142135623730951, 3.605551275463989, 2.8284271247461903, 1.4142135623730951, 2.23606797749979, 1.0, 2.23606797749979, 1.0, 1.0, 2.6457513110645907, 2.8284271247461903, 2.449489742783178, 1.7320508075688772, 2.6457513110645907, 3.3166247903554, 1.0, 2.6457513110645907, 2.8284271247461903, 1.4142135623730951, 2.0, 2.8284271247461903, 2.8284271247461903, 1.7320508075688772, 1.0, 2.0, 2.8284271247461903, 2.449489742783178, 2.23606797749979, 1.0, 2.0, 2.8284271247461903, 2.23606797749979, 2.8284271247461903, 1.4142135623730951, 2.8284271247461903, 2.0, 2.0, 3.1622776601683795, 2.449489742783178, 1.7320508075688772, 1.4142135623730951, 1.7320508075688772, 2.6457513110645907, 2.449489742783178, 1.0, 2.0, 3.0, 4.123105625617661, 3.4641016151377544, 1.0, 3.3166247903554, 1.7320508075688772, 2.449489742783178, 2.0, 1.7320508075688772, 2.0, 3.1622776601683795, 2.6457513110645907, 1.0, 2.6457513110645907, 2.23606797749979, 1.0, 1.7320508075688772, 1.4142135623730951, 1.7320508075688772, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772, 1.4142135623730951, 3.0, 3.3166247903554, 2.23606797749979, 1.0, 2.23606797749979, 1.7320508075688772, 2.23606797749979, 1.4142135623730951, 3.0, 2.0, 2.449489742783178, 3.1622776601683795, 1.0, 1.7320508075688772, 2.8284271247461903, 2.8284271247461903, 1.4142135623730951, 3.0, 1.7320508075688772, 2.8284271247461903, 1.7320508075688772, 2.449489742783178, 2.23606797749979, 1.0, 3.7416573867739413, 3.4641016151377544, 2.0, 2.23606797749979, 2.6457513110645907, 1.4142135623730951, 2.6457513110645907, 2.449489742783178, 1.0, 1.4142135623730951, 2.0, 2.0, 2.6457513110645907, 2.8284271247461903, 1.7320508075688772, 2.0, 2.0, 1.4142135623730951, 1.7320508075688772, 3.1622776601683795, 2.8284271247461903, 2.8284271247461903, 2.0, 1.4142135623730951, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.8284271247461903, 1.0, 1.4142135623730951, 3.1622776601683795, 1.7320508075688772, 2.8284271247461903, 1.4142135623730951, 3.1622776601683795, 1.0, 3.1622776601683795, 2.23606797749979, 1.7320508075688772, 1.7320508075688772, 1.4142135623730951, 2.8284271247461903, 1.0, 2.8284271247461903, 1.7320508075688772, 1.7320508075688772, 2.0, 2.0, 2.6457513110645907, 3.0, 1.7320508075688772, 2.8284271247461903, 3.3166247903554, 1.4142135623730951, 2.8284271247461903, 1.0, 2.0, 2.6457513110645907, 1.7320508075688772, 2.6457513110645907, 1.7320508075688772, 2.23606797749979, 2.0, 2.23606797749979, 3.605551275463989, 2.0, 1.0, 1.7320508075688772, 1.7320508075688772, 2.0, 1.7320508075688772, 3.1622776601683795, 1.4142135623730951, 2.8284271247461903, 1.0, 2.0, 2.8284271247461903, 2.449489742783178, 2.23606797749979, 1.4142135623730951, 2.6457513110645907, 1.0, 2.23606797749979, 2.0, 1.4142135623730951, 2.8284271247461903, 1.0, 2.23606797749979, 3.1622776601683795, 1.0, 2.449489742783178, 2.23606797749979, 2.0, 2.23606797749979, 1.7320508075688772, 1.0, 1.7320508075688772, 2.8284271247461903, 1.7320508075688772, 3.7416573867739413, 2.8284271247461903, 1.7320508075688772, 3.1622776601683795, 1.0, 1.4142135623730951, 3.1622776601683795, 2.0, 2.449489742783178, 2.0, 1.7320508075688772, 2.23606797749979, 3.1622776601683795, 1.0, 2.0, 1.7320508075688772, 3.0, 1.4142135623730951, 2.0, 1.4142135623730951, 1.7320508075688772, 3.7416573867739413, 1.4142135623730951, 3.7416573867739413, 1.0, 2.23606797749979, 1.7320508075688772, 2.6457513110645907, 1.4142135623730951, 1.7320508075688772, 2.23606797749979, 2.0, 1.0, 2.23606797749979, 2.0, 1.4142135623730951, 1.4142135623730951, 2.23606797749979, 1.4142135623730951, 1.7320508075688772, 2.449489742783178, 1.4142135623730951, 2.8284271247461903, 1.0, 2.0, 1.7320508075688772, 1.7320508075688772, 1.7320508075688772, 1.0, 1.7320508075688772, 1.4142135623730951, 1.4142135623730951, 2.449489742783178, 1.0, 2.23606797749979, 1.4142135623730951, 1.4142135623730951, 2.0, 1.4142135623730951, 2.6457513110645907, 1.4142135623730951, 1.0, 2.6457513110645907, 2.8284271247461903, 1.4142135623730951, 1.7320508075688772, 2.449489742783178, 1.0, 1.7320508075688772, 2.449489742783178, 2.6457513110645907, 1.0, 2.6457513110645907, 2.6457513110645907, 1.4142135623730951, 1.7320508075688772, 3.605551275463989, 2.0, 3.1622776601683795, 2.8284271247461903, 1.7320508075688772, 1.7320508075688772, 2.6457513110645907, 1.4142135623730951, 2.23606797749979, 3.3166247903554, 1.0] - self.assertEqual(doc._.sqrtNChars,sqrtNChars) - - def test_mean_sqrtNChars(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - sqrtNChars = 2.671858311020255 - self.assertEqual(doc._.mean_sqnChars,sqrtNChars) - - def test_med_sqrtNChars(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_sqnChars,2.6457513110645907) - - def test_max_sqrtNChars(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_sqnChars,4.123105625617661) - - def test_min_sqrtNChars(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_sqnChars,1.4142135623730951) - - def test_std_sqrtNChars(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_sqnChars,0.4641344226022158) - - def test_latinates(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - latinates = [None, 1, 0, 0, 1, None, 0, 0, 0, None, 0, 0, 0, None, None, 0, 0, None, 0, None, None, None, None, 0, None, 0, None, None, 0, 0, None, 0, None, 0, None, 1, None, None, 0, 1, None, None, 1, 0, 0, None, 0, None, 1, None, 0, None, None, 0, 1, None, 0, None, None, 1, None, 0, None, None, 1, 1, 0, None, None, None, 0, None, 0, None, 0, None, None, 1, None, 1, None, None, 0, None, 1, 1, None, 0, 0, None, None, None, 0, 0, None, 0, 0, 0, None, 0, 0, None, None, 0, None, 0, 0, None, 1, 0, 0, None, 0, None, None, None, None, 0, 1, 0, None, None, 1, None, None, 0, None, None, None, 0, None, None, None, 1, 0, None, 0, None, 1, 0, None, None, 1, None, None, 1, None, 0, None, 0, None, None, None, None, 0, None, None, None, 1, 1, None, 0, 1, None, 0, None, 0, None, 0, 1, None, 0, 0, 0, None, 0, 0, None, None, 1, None, 0, 0, 0, None, 0, None, 0, None, 0, None, None, 0, 0, None, None, 1, 0, 0, 0, None, 0, 1, 0, None, 1, None, 1, 1, None, 1, None, 0, 0, 0, None, 0, None, 0, None, 0, 0, None, None, 0, None, None, 1, None, 0, None, None, 1, 1, None, 1, None, 0, None, 0, 0, None, None, 0, 0, 0, None, 0, 0, None, 0, None, None, 0, None, 0, None, 0, None, 0, 1, 0, None, 0, None, 0, None, 0, None, None, 0, None, None, 1, None, 0, None, 0, None, 0, None, None, None, None, 0, None, 0, 0, 1, None, None, None, 0, 0, 0, None, None, 0, None, 0, 0, 0, None, None, None, 0, 0, 0, None, None, 0, None, None, 1, None, 1, None, 1, None, 0, None, 0, 1, None, None, 1, 0, None, None, 1, 0, 1, None, 1, 0, None, 1, 0, 0, None, 0, 1, None, None, 0, None, None, None, 0, 1, None, 0, 1, None, 0, None, 0, 0, None, 0, None, None, None, 1, None, 1, None, 0, 0, 0, None, 0, 1, 0, None, None, 0, None, 1, None, None, 1, 0, 1, None, 0, 1, None, 0, 1, None, 1, None, None, None, 0, None, None, 1, None, None, None, 0, 0, None, 0, 0, None, 0, 1, None, 0, None, None, 1, 1, None, None, 0, None, None, None, None, 0, None, None, 0, 1, None, 0, None, None, None, None, 1, None, 0, None, 1, 1, None, 0, 0, None, 0, 1, 0, None, None, None, 0, 0, 0, None, None, 0, 0, 0, None, 0, None, None, 0, 0, None, None, None, 1, 0, None, None, 1, 1, 1, None, 1, None, 0, None, None, None, 0, 0, None, None, None, None, 0, None, None, 0, None, None, None, 1, 1, 0, None, 0, None, 0, None, 0, 0, None, 0, None, None, 0, 0, None, 1, None, 0, None, 0, 0, None, 1, 1, None, 0, 0, None, 0, 0, None, None, None, 0, 0, 1, None, None, 0, None, None, 0, 1, 1, 0, None, None, 0, None, 1, None, None, 1, None, 1, None, 0, None, 1, 0, None, None, None, 0, None, 0, None, None, 0, None, 0, 0, None, 0, 1, None, 0, None, None, 0, None, 0, None, 0, 0, 0, 1, 0, None, None, 0, None, None, 0, None, 0, None, 0, 0, 0, 0, None, 0, None, 0, 0, None, 0, None, 0, 0, None, 0, 0, None, 0, None, None, None, 0, None, 1, 0, None, 1, None, None, 0, None, 0, 0, None, 0, 0, None, None, None, 0, None, 0, None, None, 1, None, None, None, 0, None, 0, None, None, 0, 0, None, None, None, None, None, 0, None, None, 0, None, 0, None, None, 0, None, 0, None, None, None, None, 0, None, None, None, None, 0, None, 0, None, None, 1, 0, None, 0, 0, None, None, 0, 0, None, 0, 0, None, None, 1, None, 0, 1, None, 0, 0, None, 0, 1, None] - - self.assertEqual(doc._.latinates,latinates) - - def test_propn_latinate(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_latinate,0.2835820895522388) - - def test_academics(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - academic = [0, 1, 1, 1, 1, 0, 0, 0, 0, None, 1, 0, 1, 0, 0, 0, 0, None, 0, 0, 0, 0, None, 0, None, 0, 0, 0, 0, 0, None, 0, 0, 0, 0, 1, None, None, 0, 1, None, None, 1, 0, 1, 0, 1, 0, 1, 0, 0, None, 0, 0, 0, None, 0, None, None, 0, 0, 0, None, None, 1, 0, 0, None, 0, 0, 0, None, 0, 0, 0, 0, 0, 1, 0, 1, 0, None, 1, 0, 1, 1, None, 0, 0, 0, 0, None, 0, 0, None, 0, 0, 0, None, 0, 1, 0, None, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, None, 0, None, 0, 0, 1, 0, 0, 0, 1, None, 0, 0, None, 0, 0, 0, None, None, 0, 1, 1, None, 0, 0, 1, 0, None, None, 1, None, 0, 1, 0, 0, None, 0, 0, 0, None, 0, 1, None, 0, None, 1, 1, None, 1, 1, 0, 0, 0, 0, None, 0, 1, None, 1, 1, 0, None, 0, 0, None, 0, 1, None, 0, 0, 0, 0, 0, None, 0, None, 1, None, None, 1, 0, None, 0, 1, 0, 0, 0, None, 0, 1, 1, None, 1, None, 1, 1, 0, 1, None, 1, 0, 1, 0, 0, None, 0, 0, 1, 1, None, None, 1, None, 0, 1, None, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, None, 0, 1, 1, 1, 0, 1, 0, 0, 0, None, 0, 0, None, 1, 0, 0, 0, 1, 1, 0, 0, 0, None, 0, 0, 1, None, 0, 1, 0, 0, 0, 0, 0, 0, 0, None, 0, 0, 0, None, None, 0, None, 1, 0, 0, None, 0, 0, 0, 0, 1, None, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, None, 0, 1, None, 0, 0, None, 0, 0, 1, None, 0, 0, 1, 1, None, None, 1, 1, None, 0, 1, 1, 0, 0, 1, 0, None, 1, 0, 0, None, 1, 1, None, 0, 0, None, 0, None, 0, 1, None, 1, 1, None, 0, 0, 1, 0, None, 1, 0, 0, 0, 1, None, 0, None, 0, 0, 0, None, 0, 1, 0, 0, None, 0, None, 1, None, 0, 1, 1, 1, 0, 0, 0, None, 0, 0, 0, 0, None, None, None, 1, None, 0, 1, None, None, 0, 0, 0, 0, 1, 0, 0, 0, 0, None, 1, 0, 0, 0, 1, 0, 0, 0, None, 0, 0, 0, 0, 0, None, 1, 0, None, 1, None, 0, None, None, 0, 0, 0, 0, 1, 1, None, 1, 0, None, 0, 1, 1, 0, None, 0, 0, 0, 0, None, 0, 0, 0, 0, None, 0, 0, 0, 1, 0, 0, None, 0, 1, 0, None, 0, 1, 0, 1, None, 0, 0, 0, 0, 0, 0, 1, 0, None, 0, 0, None, 0, None, 0, 1, None, 0, None, 0, 1, 1, None, 0, 0, 1, None, 1, 0, 0, 1, None, 0, 1, 0, None, 1, 0, 0, 0, 0, 1, None, 0, 1, 0, 0, 0, None, 0, 0, None, None, 0, 0, 1, 1, 0, 0, 0, None, 0, 1, 1, 1, 0, None, 0, 1, None, 0, None, None, 1, 0, 1, None, 1, None, 1, 1, 0, 0, None, 1, None, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, None, 0, None, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, None, 0, 0, 0, 0, 1, None, 0, None, 0, 0, 0, 0, None, 0, None, 0, 0, None, 0, None, 0, 1, None, 0, 0, 0, 0, 0, None, 0, 0, 0, 1, 1, 0, 0, None, None, 1, 0, 0, 0, 0, 0, 1, None, 0, 0, 1, None, 0, None, 0, 1, None, 0, None, 0, 0, 0, None, 0, 0, 0, None, 0, 0, None, None, 1, None, 0, 0, None, 1, None, 0, 0, 0, 0, None, 0, None, None, 0, None, 0, None, None, 0, None, 0, None, None, 0, 1, None, 0, 0, None, 0, 0, 1, None, 0, 0, None, 0, 1, 0, 1, 1, 0, 0, 0, None, 0, 0, None] - self.assertEqual(doc._.academics,academic) - - - def test_propn_academic(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_academic,0.4418604651162791) - - def test_family_sizes(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - family_sizes = [1, 7, 13, 7, 5, 2, 11, 22, 16, 2, 2, 13, 8, 2, 1, 2, 2, None, 6, 1, 13, 24, 2, 15, 7, 13, 2, 1, 5, 5, 1, 4, 13, 11, 1, 20, None, 2, 11, 11, 2, 2, 4, 7, 8, 2, 7, 13, 14, 1, 6, None, 2, 4, 6, 1, 15, 7, 2, 4, 3, 14, 2, 2, 12, 2, 3, None, 1, 1, 2, None, 5, 24, 5, 3, 1, 15, 9, 13, 2, 2, 21, 3, 14, 18, 2, 2, 5, 3, 24, None, 2, 11, None, 1, 5, 8, None, 5, 4, 9, 24, 4, 4, 2, 6, 3, 14, 2, 9, 1, 8, None, 1, 1, 2, 8, 10, 2, 13, 24, 8, 1, 1, 6, 1, 1, None, 2, None, None, 1, 7, 5, 1, 5, 2, 20, 11, 1, 2, 10, 7, 1, 8, 2, 13, 1, 22, 2, 8, None, 1, 2, 24, 3, 2, 5, 12, 2, 7, 6, 1, 9, 2, 4, 1, 22, 15, 1, 9, 2, 3, None, 7, 10, 1, 1, 15, None, 5, 2, 14, 3, 1, 2, 2, None, 21, None, 2, 6, 10, None, 1, 2, 15, 2, 9, 1, 8, 9, 8, 1, 8, None, 18, 14, 2, 11, None, 7, 11, 7, 2, 22, 2, 13, 1, 6, 1, None, None, 1, None, 2, 12, 2, 7, 22, 4, 5, 4, 1, 15, 3, 9, 1, 13, 5, None, 1, 2, 2, 8, 3, 7, 7, 2, 6, None, 1, 2, None, 21, 2, 10, 1, 8, 11, 15, 2, 4, 1, 5, 1, 8, None, 2, 8, 22, 4, 4, 1, 9, 2, 13, 1, 22, 2, 8, None, 7, 3, None, 7, 11, 13, 1, 4, 1, 4, 14, 6, None, 2, 1, 3, 15, 14, 7, 3, 22, 4, 8, 1, 7, None, 2, 2, None, 1, 7, 1, 4, 13, 15, 2, 9, 2, 6, 10, 2, 2, 15, 6, None, 2, 5, 17, 5, 1, 6, 11, None, 15, 9, 4, 2, 6, 8, 1, 1, 5, None, 2, None, 2, 4, None, 1, 4, 2, 9, 13, 15, 23, None, 5, 3, 24, 4, 12, 2, 11, None, 7, 10, 4, 1, 11, 4, 13, 1, 1, 11, None, 9, 1, 2, 5, 6, 6, 1, 6, 17, 1, 2, 9, 2, 19, None, None, 7, 5, 1, 1, 7, None, 6, 2, 1, 11, 2, 7, 11, 1, 13, 17, None, 14, 2, 1, 2, 14, 2, 1, 2, 1, 1, 1, 13, 7, 2, 2, 9, 7, 1, 2, None, None, None, 2, 19, 1, 2, 2, 4, 18, None, 4, 1, 1, 12, 6, 4, 2, 2, 1, 18, 7, 15, None, 2, 5, 14, 2, 1, 22, 3, 24, 2, 4, 3, 1, 1, 4, 2, None, 2, 5, 9, 13, None, 9, 2, 6, 1, 1, 2, 8, 4, None, None, None, None, 5, 1, 1, 2, 1, 1, 1, 7, 19, 3, None, 7, 4, 2, 2, 2, 14, 1, 2, None, 1, 6, 21, 1, 19, 2, 4, 13, 7, 10, None, 8, 7, 1, 3, None, 1, 4, None, None, None, 2, 2, 2, 6, 1, 2, 13, 7, 2, 7, 16, 6, 13, 1, 1, 7, 1, 13, None, 2, 5, 6, 12, 2, 7, None, 8, 3, 2, 1, 24, 10, None, 14, 2, 1, 5, None, 2, 5, 1, 12, 8, 1, 2, None, 2, 3, 13, 7, 1, 13, 6, 1, None, 7, None, 2, 1, 2, 1, 7, 1, 2, None, 11, 22, 13, 5, 1, 17, 2, 6, 11, 1, 2, None, 28, 7, None, 6, 7, 24, 12, 11, None, 2, 2, 24, 8, 6, 2, 5, None, None, 7, 9, 2, 9, 1, 13, 8, None, 1, 1, 15, 1, 8, 1, 1, 6, 1, 1, None, 9, 1, 13, 1, 1, 13, 6, None, 1, 9, 24, 2, 10, 1, 1, 11, 1, 6, None, 1, 14, 2, 9, None, 2, 6, 1, 4, None, 1, 24, 2, 9, 1, 4, 1, 2, 2, 3, 1, 14, 5, None, 2, 1, 4, 2, 13, 6, 1, 1, 8, 3, 7, 6, 2, 14, 2, 1, 13, 17, None] - - self.assertEqual(doc._.family_sizes,family_sizes) - - def test_mean_family_size(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - mean_family_size = 8.439169139465875 - self.assertEqual(doc._.mean_family_size,mean_family_size) - - def test_med_family_size(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_family_size,7.0) - - def test_max_family_size(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_family_size,23.0) - - def test_min_family_size(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_family_size,1.0) - - def test_std_family_size(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_family_size,5.163690473645397) - - def test_sensenums(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - nSenses = [None, 7, 13, 2, 2, None, 22, 15, 52, 5, 3, 4, 8, 7, None, 6, 2, None, 1, 4, 20, 14, 6, 15, 7, 5, None, None, 19, 14, None, 6, 20, 20, 5, 3, None, 7, 8, 8, 5, 7, 3, 10, 3, None, 2, 20, 3, None, 9, None, 5, 6, 6, None, 34, 7, 1, 2, None, 57, 5, 1, 5, 3, 4, None, None, None, 2, None, 9, 14, 18, None, None, 1, 6, 1, None, 7, 2, None, 13, 5, 5, 1, 12, None, 14, None, 1, 11, None, None, 17, 17, None, 8, 5, 6, 14, 13, 7, 5, 6, None, 2, 5, 6, None, 9, None, 2, None, None, 10, 2, None, 20, 14, 1, 2, None, 38, None, None, 1, 2, None, None, None, 7, 4, None, 12, None, 4, 20, None, 7, 3, 7, None, 2, None, 4, None, 14, None, None, None, None, 7, 14, None, 1, 7, 2, 5, 2, 4, None, 7, None, 6, None, 14, 1, None, 3, 3, 2, None, 14, 28, 2, None, 7, None, 9, None, 3, None, None, 7, 5, None, 2, None, 2, 8, 4, None, None, 2, 4, None, 7, None, 18, 10, 2, None, 11, None, 5, 8, None, 3, None, 2, 23, 8, None, 15, 2, 51, None, 3, 1, None, None, 4, None, None, 2, 5, 2, 16, 1, 3, 2, None, 1, None, 30, None, 4, 1, None, None, 2, 6, 3, None, 2, 10, None, 4, None, None, 5, None, 2, None, 4, 3, 22, 1, 15, None, 6, None, 19, 5, 1, None, None, 2, 16, 1, 2, None, 7, None, 4, None, 14, None, None, None, 7, 4, None, 2, 22, 3, None, 1, 9, 8, 12, 3, None, 1, 2, None, 6, 12, 16, None, 16, 1, 2, None, 2, None, None, 6, None, None, 2, None, 2, 20, 10, 7, 7, None, 5, 3, 5, 7, 2, 18, None, None, 7, 7, 11, None, 5, 9, None, 2, 6, 4, 7, 7, 1, None, None, 6, None, None, None, 3, 7, None, 3, 2, 5, 6, 20, 6, 4, None, 11, None, 14, 1, 7, 5, 3, None, 10, 19, 6, None, 2, 3, 5, 2, 10, 4, None, 5, None, None, 5, 3, 4, None, 1, 3, None, 3, 1, None, 3, None, None, 7, 7, None, None, 7, None, None, 8, 14, 25, None, 2, 22, None, 4, 3, None, 9, None, None, 3, 3, None, None, 3, None, None, 1, 20, 10, None, 1, 1, 17, None, 5, None, None, None, 7, 4, 1, 3, None, 4, 5, None, 6, 3, None, 3, 4, 2, None, 7, 5, 4, 7, 13, None, None, 10, 36, 2, None, 15, None, 14, 1, 37, 9, None, None, 4, 1, None, None, 7, 1, 2, None, 6, 8, 2, None, None, 5, 4, 14, None, None, None, None, 9, None, None, 1, None, None, 1, None, 9, 4, None, 30, None, 5, 5, 3, 11, 1, 2, None, None, 3, 4, None, 5, None, 5, 20, 3, 8, None, None, 2, None, 7, 2, None, 5, 3, None, None, None, 21, 6, 7, None, 5, 9, 7, None, 2, 4, 7, 9, None, None, 7, None, 3, None, 2, 7, None, 2, 5, 2, None, 4, 4, 8, 8, 14, 1, None, 9, None, None, 11, 1, 2, 7, None, 10, 5, None, 1, None, None, 1, 20, 3, None, 4, 10, None, 1, 10, None, None, 6, None, None, 2, None, 1, None, 22, 15, 4, 4, None, 2, 7, 9, 22, None, 1, None, 13, 2, None, 4, 7, 14, 45, 17, None, None, 1, 14, 1, 6, None, 2, None, None, 2, 6, 5, 30, None, 4, 8, None, None, None, 4, None, 18, None, None, 2, None, 1, None, 45, None, 5, None, None, 4, 10, None, 4, 6, 14, 6, 9, None, None, 17, None, 3, None, 1, 12, None, 9, None, None, None, None, 8, None, 4, 14, 6, 7, None, 11, None, 7, 2, 4, None, 12, 12, None, 1, 4, 6, 7, 3, 6, None, None, 4, None, 2, 7, None, 12, 2, None, 4, 3, None] - self.assertEqual(doc._.sensenums,nSenses) - - def test_mean_nSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - mean_nSenses = 7.238235294117647 - self.assertEqual(doc._.mean_nSenses,mean_nSenses) - - def test_med_nSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_nSenses,4.5) - - def test_max_nSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_nSenses,57.0) - - def test_min_nSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_nSenses,1.0) - - def test_std_nSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_nSenses,8.193403876479936) - - def test_log_sensenums(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - logsensenums=[None, 1.9459101490553132, 2.5649493574615367, 0.6931471805599453, 0.6931471805599453, None, 3.091042453358316, 2.70805020110221, 3.9512437185814275, 1.6094379124341003, 1.0986122886681098, 1.3862943611198906, 2.0794415416798357, 1.9459101490553132, None, 1.791759469228055, 0.6931471805599453, None, 0.0, 1.3862943611198906, 2.995732273553991, 2.6390573296152584, 1.791759469228055, 2.70805020110221, 1.9459101490553132, 1.6094379124341003, None, None, 2.9444389791664403, 2.6390573296152584, None, 1.791759469228055, 2.995732273553991, 2.995732273553991, 1.6094379124341003, 1.0986122886681098, None, 1.9459101490553132, 2.0794415416798357, 2.0794415416798357, 1.6094379124341003, 1.9459101490553132, 1.0986122886681098, 2.302585092994046, 1.0986122886681098, None, 0.6931471805599453, 2.995732273553991, 1.0986122886681098, None, 2.1972245773362196, None, 1.6094379124341003, 1.791759469228055, 1.791759469228055, None, 3.5263605246161616, 1.9459101490553132, 0.0, 0.6931471805599453, None, 4.04305126783455, 1.6094379124341003, 0.0, 1.6094379124341003, 1.0986122886681098, 1.3862943611198906, None, None, None, 0.6931471805599453, None, 2.1972245773362196, 2.6390573296152584, 2.8903717578961645, None, None, 0.0, 1.791759469228055, 0.0, None, 1.9459101490553132, 0.6931471805599453, None, 2.5649493574615367, 1.6094379124341003, 1.6094379124341003, 0.0, 2.4849066497880004, None, 2.6390573296152584, None, 0.0, 2.3978952727983707, None, None, 2.833213344056216, 2.833213344056216, None, 2.0794415416798357, 1.6094379124341003, 1.791759469228055, 2.6390573296152584, 2.5649493574615367, 1.9459101490553132, 1.6094379124341003, 1.791759469228055, None, 0.6931471805599453, 1.6094379124341003, 1.791759469228055, None, 2.1972245773362196, None, 0.6931471805599453, None, None, 2.302585092994046, 0.6931471805599453, None, 2.995732273553991, 2.6390573296152584, 0.0, 0.6931471805599453, None, 3.6375861597263857, None, None, 0.0, 0.6931471805599453, None, None, None, 1.9459101490553132, 1.3862943611198906, None, 2.4849066497880004, None, 1.3862943611198906, 2.995732273553991, None, 1.9459101490553132, 1.0986122886681098, 1.9459101490553132, None, 0.6931471805599453, None, 1.3862943611198906, None, 2.6390573296152584, None, None, None, None, 1.9459101490553132, 2.6390573296152584, None, 0.0, 1.9459101490553132, 0.6931471805599453, 1.6094379124341003, 0.6931471805599453, 1.3862943611198906, None, 1.9459101490553132, None, 1.791759469228055, None, 2.6390573296152584, 0.0, None, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453, None, 2.6390573296152584, 3.332204510175204, 0.6931471805599453, None, 1.9459101490553132, None, 2.1972245773362196, None, 1.0986122886681098, None, None, 1.9459101490553132, 1.6094379124341003, None, 0.6931471805599453, None, 0.6931471805599453, 2.0794415416798357, 1.3862943611198906, None, None, 0.6931471805599453, 1.3862943611198906, None, 1.9459101490553132, None, 2.8903717578961645, 2.302585092994046, 0.6931471805599453, None, 2.3978952727983707, None, 1.6094379124341003, 2.0794415416798357, None, 1.0986122886681098, None, 0.6931471805599453, 3.1354942159291497, 2.0794415416798357, None, 2.70805020110221, 0.6931471805599453, 3.9318256327243257, None, 1.0986122886681098, 0.0, None, None, 1.3862943611198906, None, None, 0.6931471805599453, 1.6094379124341003, 0.6931471805599453, 2.772588722239781, 0.0, 1.0986122886681098, 0.6931471805599453, None, 0.0, None, 3.4011973816621555, None, 1.3862943611198906, 0.0, None, None, 0.6931471805599453, 1.791759469228055, 1.0986122886681098, None, 0.6931471805599453, 2.302585092994046, None, 1.3862943611198906, None, None, 1.6094379124341003, None, 0.6931471805599453, None, 1.3862943611198906, 1.0986122886681098, 3.091042453358316, 0.0, 2.70805020110221, None, 1.791759469228055, None, 2.9444389791664403, 1.6094379124341003, 0.0, None, None, 0.6931471805599453, 2.772588722239781, 0.0, 0.6931471805599453, None, 1.9459101490553132, None, 1.3862943611198906, None, 2.6390573296152584, None, None, None, 1.9459101490553132, 1.3862943611198906, None, 0.6931471805599453, 3.091042453358316, 1.0986122886681098, None, 0.0, 2.1972245773362196, 2.0794415416798357, 2.4849066497880004, 1.0986122886681098, None, 0.0, 0.6931471805599453, None, 1.791759469228055, 2.4849066497880004, 2.772588722239781, None, 2.772588722239781, 0.0, 0.6931471805599453, None, 0.6931471805599453, None, None, 1.791759469228055, None, None, 0.6931471805599453, None, 0.6931471805599453, 2.995732273553991, 2.302585092994046, 1.9459101490553132, 1.9459101490553132, None, 1.6094379124341003, 1.0986122886681098, 1.6094379124341003, 1.9459101490553132, 0.6931471805599453, 2.8903717578961645, None, None, 1.9459101490553132, 1.9459101490553132, 2.3978952727983707, None, 1.6094379124341003, 2.1972245773362196, None, 0.6931471805599453, 1.791759469228055, 1.3862943611198906, 1.9459101490553132, 1.9459101490553132, 0.0, None, None, 1.791759469228055, None, None, None, 1.0986122886681098, 1.9459101490553132, None, 1.0986122886681098, 0.6931471805599453, 1.6094379124341003, 1.791759469228055, 2.995732273553991, 1.791759469228055, 1.3862943611198906, None, 2.3978952727983707, None, 2.6390573296152584, 0.0, 1.9459101490553132, 1.6094379124341003, 1.0986122886681098, None, 2.302585092994046, 2.9444389791664403, 1.791759469228055, None, 0.6931471805599453, 1.0986122886681098, 1.6094379124341003, 0.6931471805599453, 2.302585092994046, 1.3862943611198906, None, 1.6094379124341003, None, None, 1.6094379124341003, 1.0986122886681098, 1.3862943611198906, None, 0.0, 1.0986122886681098, None, 1.0986122886681098, 0.0, None, 1.0986122886681098, None, None, 1.9459101490553132, 1.9459101490553132, None, None, 1.9459101490553132, None, None, 2.0794415416798357, 2.6390573296152584, 3.2188758248682006, None, 0.6931471805599453, 3.091042453358316, None, 1.3862943611198906, 1.0986122886681098, None, 2.1972245773362196, None, None, 1.0986122886681098, 1.0986122886681098, None, None, 1.0986122886681098, None, None, 0.0, 2.995732273553991, 2.302585092994046, None, 0.0, 0.0, 2.833213344056216, None, 1.6094379124341003, None, None, None, 1.9459101490553132, 1.3862943611198906, 0.0, 1.0986122886681098, None, 1.3862943611198906, 1.6094379124341003, None, 1.791759469228055, 1.0986122886681098, None, 1.0986122886681098, 1.3862943611198906, 0.6931471805599453, None, 1.9459101490553132, 1.6094379124341003, 1.3862943611198906, 1.9459101490553132, 2.5649493574615367, None, None, 2.302585092994046, 3.58351893845611, 0.6931471805599453, None, 2.70805020110221, None, 2.6390573296152584, 0.0, 3.6109179126442243, 2.1972245773362196, None, None, 1.3862943611198906, 0.0, None, None, 1.9459101490553132, 0.0, 0.6931471805599453, None, 1.791759469228055, 2.0794415416798357, 0.6931471805599453, None, None, 1.6094379124341003, 1.3862943611198906, 2.6390573296152584, None, None, None, None, 2.1972245773362196, None, None, 0.0, None, None, 0.0, None, 2.1972245773362196, 1.3862943611198906, None, 3.4011973816621555, None, 1.6094379124341003, 1.6094379124341003, 1.0986122886681098, 2.3978952727983707, 0.0, 0.6931471805599453, None, None, 1.0986122886681098, 1.3862943611198906, None, 1.6094379124341003, None, 1.6094379124341003, 2.995732273553991, 1.0986122886681098, 2.0794415416798357, None, None, 0.6931471805599453, None, 1.9459101490553132, 0.6931471805599453, None, 1.6094379124341003, 1.0986122886681098, None, None, None, 3.044522437723423, 1.791759469228055, 1.9459101490553132, None, 1.6094379124341003, 2.1972245773362196, 1.9459101490553132, None, 0.6931471805599453, 1.3862943611198906, 1.9459101490553132, 2.1972245773362196, None, None, 1.9459101490553132, None, 1.0986122886681098, None, 0.6931471805599453, 1.9459101490553132, None, 0.6931471805599453, 1.6094379124341003, 0.6931471805599453, None, 1.3862943611198906, 1.3862943611198906, 2.0794415416798357, 2.0794415416798357, 2.6390573296152584, 0.0, None, 2.1972245773362196, None, None, 2.3978952727983707, 0.0, 0.6931471805599453, 1.9459101490553132, None, 2.302585092994046, 1.6094379124341003, None, 0.0, None, None, 0.0, 2.995732273553991, 1.0986122886681098, None, 1.3862943611198906, 2.302585092994046, None, 0.0, 2.302585092994046, None, None, 1.791759469228055, None, None, 0.6931471805599453, None, 0.0, None, 3.091042453358316, 2.70805020110221, 1.3862943611198906, 1.3862943611198906, None, 0.6931471805599453, 1.9459101490553132, 2.1972245773362196, 3.091042453358316, None, 0.0, None, 2.5649493574615367, 0.6931471805599453, None, 1.3862943611198906, 1.9459101490553132, 2.6390573296152584, 3.8066624897703196, 2.833213344056216, None, None, 0.0, 2.6390573296152584, 0.0, 1.791759469228055, None, 0.6931471805599453, None, None, 0.6931471805599453, 1.791759469228055, 1.6094379124341003, 3.4011973816621555, None, 1.3862943611198906, 2.0794415416798357, None, None, None, 1.3862943611198906, None, 2.8903717578961645, None, None, 0.6931471805599453, None, 0.0, None, 3.8066624897703196, None, 1.6094379124341003, None, None, 1.3862943611198906, 2.302585092994046, None, 1.3862943611198906, 1.791759469228055, 2.6390573296152584, 1.791759469228055, 2.1972245773362196, None, None, 2.833213344056216, None, 1.0986122886681098, None, 0.0, 2.4849066497880004, None, 2.1972245773362196, None, None, None, None, 2.0794415416798357, None, 1.3862943611198906, 2.6390573296152584, 1.791759469228055, 1.9459101490553132, None, 2.3978952727983707, None, 1.9459101490553132, 0.6931471805599453, 1.3862943611198906, None, 2.4849066497880004, 2.4849066497880004, None, 0.0, 1.3862943611198906, 1.791759469228055, 1.9459101490553132, 1.0986122886681098, 1.791759469228055, None, None, 1.3862943611198906, None, 0.6931471805599453, 1.9459101490553132, None, 2.4849066497880004, 0.6931471805599453, None, 1.3862943611198906, 1.0986122886681098, None] - self.assertEqual(doc._.logsensenums,logsensenums) - - def test_mean_logNSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - mean_logNSenses = 1.5472290136608735 - self.assertEqual(doc._.mean_logNSenses,mean_logNSenses) - - def test_med_logNSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_logNSenses,1.4978661367769954) - - def test_max_logNSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_logNSenses,4.04305126783455) - - def test_min_logNSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_logNSenses,0.0) - - def test_std_logNSenses(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_logNSenses,0.9139907101609602) - - - def test_morpholex(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - morpholex = [None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(state)}>ment>', 'ROOT1_PFMF': '3.44', 'ROOT1_FamSize': '30', 'ROOT1_Freq_HAL': '451541', 'SUFF1_PFMF': '1.04', 'SUFF1_FamSize': '288', 'SUFF1_Freq_HAL': '1423689', 'SUFF1_length': '4', 'SUFF1_P': '1.26E-05', 'SUFF1_P*': '2.78E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(link)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '82099'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'RB', 'Nmorph': '4', 'PRS_signature': '"0,1,3"', 'MorphoLexSegm': '{(neg)>ate>}>ive>>ly>', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '29916', 'SUFF1_PFMF': '29.7', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01', 'SUFF2_PFMF': '22.79', 'SUFF2_FamSize': '580', 'SUFF2_Freq_HAL': '1037354', 'SUFF2_length': '3', 'SUFF2_P': '1.74E-05', 'SUFF2_P*': '2.78E-03', 'SUFF3_PFMF': '12.7', 'SUFF3_FamSize': '2898', 'SUFF3_Freq_HAL': '3857999', 'SUFF3_length': '2', 'SUFF3_P': '4.48E-05', 'SUFF3_P*': '0.02'}, None, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, {'POS': 'VB|NN|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(play)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '36', 'ROOT1_Freq_HAL': '458704'}, None, {'POS': 'JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(recent)}', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '134614'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(experience)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '297837'}, None, None, {'POS': 'JJ|NN|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(past)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '90058'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(century)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '39167'}, None, {'POS': 'RB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(sure)}>ly>', 'ROOT1_PFMF': '13.04', 'ROOT1_FamSize': '24', 'ROOT1_Freq_HAL': '362162', 'SUFF1_PFMF': '1.89', 'SUFF1_FamSize': '2898', 'SUFF1_Freq_HAL': '3857999', 'SUFF1_length': '2', 'SUFF1_P': '4.48E-05', 'SUFF1_P*': '0.02'}, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(time)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '1099121'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(history)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '124154', 'SUFF1_PFMF': '0.44', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(live)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '291215'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(life)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '240644'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(people)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '798705'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(change)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '348247'}, None, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(drama)}>ic>>ly>', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '16813', 'SUFF1_PFMF': '6.41', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03', 'SUFF2_PFMF': '5.62', 'SUFF2_FamSize': '2898', 'SUFF2_Freq_HAL': '3857999', 'SUFF2_length': '2', 'SUFF2_P': '4.48E-05', 'SUFF2_P*': '0.02'}, None, None, {'POS': 'JJ|NN|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(quick)}', 'ROOT1_PFMF': '9.09', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '92596'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>', 'PREF1_PFMF': '9.43', 'PREF1_FamSize': '499', 'PREF1_Freq_HAL': '2175658', 'PREF1_length': '2', 'PREF1_P': '1.38E-05', 'PREF1_P*': '4.63E-03', 'ROOT1_PFMF': '7.69', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '38887', 'SUFF1_PFMF': '12.14', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(typo)}>al>', 'ROOT1_PFMF': '2.85', 'ROOT1_FamSize': '36', 'ROOT1_Freq_HAL': '282037', 'SUFF1_PFMF': '3.42', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(day)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '35', 'ROOT1_Freq_HAL': '778343'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(reveal)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '32067'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'VB', 'Nmorph': '4', 'PRS_signature': '"1,1,2"', 'MorphoLexSegm': '{ion>}>ize>', 'PREF1_PFMF': '37.55', 'PREF1_FamSize': '499', 'PREF1_Freq_HAL': '2175658', 'PREF1_length': '2', 'PREF1_P': '1.38E-05', 'PREF1_P*': '4.63E-03', 'ROOT1_PFMF': '58.82', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '158531', 'SUFF1_PFMF': '38.92', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03', 'SUFF2_PFMF': '20.74', 'SUFF2_FamSize': '430', 'SUFF2_Freq_HAL': '443161', 'SUFF2_length': '3', 'SUFF2_P': '1.49E-04', 'SUFF2_P*': '0.01'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(world)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '345235'}, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(people)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '798705'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{al>}', 'PREF1_PFMF': '3.08', 'PREF1_FamSize': '358', 'PREF1_Freq_HAL': '2402388', 'PREF1_length': '2', 'PREF1_P': '6.66E-06', 'PREF1_P*': '2.47E-03', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '84483', 'SUFF1_PFMF': '1.95', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(combust)}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '1982', 'SUFF1_PFMF': '27.97', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(engine)}', 'ROOT1_PFMF': '100', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '194206'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(work)}{(day)}', 'ROOT1_PFMF': '38.82', 'ROOT1_FamSize': '86', 'ROOT1_Freq_HAL': '1051110', 'ROOT2_PFMF': '67.64', 'ROOT2_FamSize': '35', 'ROOT2_Freq_HAL': '778343'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(chance)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '72023'}, None, {'POS': 'JJ|RB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(high)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '24', 'ROOT1_Freq_HAL': '339513'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(employ)}>ee>', 'ROOT1_PFMF': '12.5', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '116155', 'SUFF1_PFMF': '4.25', 'SUFF1_FamSize': '48', 'SUFF1_Freq_HAL': '130049', 'SUFF1_length': '2', 'SUFF1_P': '1.54E-05', 'SUFF1_P*': '3.09E-04'}, None, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': 'er>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '344318', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '2274', 'SUFF1_Freq_HAL': '4569119', 'SUFF1_length': '2', 'SUFF1_P': '3.55E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '510787', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN|JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(silic)>on>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '12811', 'SUFF1_PFMF': '10.52', 'SUFF1_FamSize': '20', 'SUFF1_Freq_HAL': '128335', 'SUFF1_length': '2', 'SUFF1_P': '0', 'SUFF1_P*': '0'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(bridge)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '26961'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(micro)>on>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '2788', 'SUFF1_PFMF': '47.36', 'SUFF1_FamSize': '20', 'SUFF1_Freq_HAL': '128335', 'SUFF1_length': '2', 'SUFF1_P': '0', 'SUFF1_P*': '0'}, {'POS': 'JJ|RB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(wide)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '118804'}, None, {'POS': 'minor|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(upon)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '78918'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(leave)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '122742'}, {'POS': 'NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(home)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '29', 'ROOT1_Freq_HAL': '216780'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(family)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '145491'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(member)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '163906'}, None, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(reach)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '76970'}, None, {'POS': 'JJ|NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(wire)}>less>', 'ROOT1_PFMF': '8.33', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '37645', 'SUFF1_PFMF': '0.54', 'SUFF1_FamSize': '368', 'SUFF1_Freq_HAL': '158354', 'SUFF1_length': '4', 'SUFF1_P': '1.20E-04', 'SUFF1_P*': '2.94E-03'}, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(net)}{(work)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '297666', 'ROOT2_PFMF': '1.17', 'ROOT2_FamSize': '86', 'ROOT2_Freq_HAL': '1051110'}, None, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(util)>ize>}', 'ROOT1_PFMF': '20', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '57081', 'SUFF1_PFMF': '1.63', 'SUFF1_FamSize': '430', 'SUFF1_Freq_HAL': '443161', 'SUFF1_length': '3', 'SUFF1_P': '1.49E-04', 'SUFF1_P*': '0.01'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(satellite)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '19083'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(orbit)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '11239'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(earth)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '77215'}, None, None, None, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(common)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '142732'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(occur)}>ance>', 'ROOT1_PFMF': '100', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '53283', 'SUFF1_PFMF': '13.04', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(could)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '610350'}, None, None, {'POS': 'JJ', 'Nmorph': '4', 'PRS_signature': '"2,1,1"', 'MorphoLexSegm': 'able>', 'PREF1_PFMF': '18.87', 'PREF1_FamSize': '658', 'PREF1_Freq_HAL': '610746', 'PREF1_length': '2', 'PREF1_P': '4.26E-05', 'PREF1_P*': '4.02E-03', 'PREF2_PFMF': '42.27', 'PREF2_FamSize': '370', 'PREF2_Freq_HAL': '1256048', 'PREF2_length': '2', 'PREF2_P': '1.51E-05', 'PREF2_P*': '2.94E-03', 'ROOT1_PFMF': '42.1', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '181726', 'SUFF1_PFMF': '19.74', 'SUFF1_FamSize': '872', 'SUFF1_Freq_HAL': '1227992', 'SUFF1_length': '4', 'SUFF1_P': '3.18E-05', 'SUFF1_P*': '6.03E-03'}, None, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(turn)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '195606'}, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(century)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '39167'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(state)}>ment>', 'ROOT1_PFMF': '3.44', 'ROOT1_FamSize': '30', 'ROOT1_Freq_HAL': '451541', 'SUFF1_PFMF': '1.04', 'SUFF1_FamSize': '288', 'SUFF1_Freq_HAL': '1423689', 'SUFF1_length': '4', 'SUFF1_P': '1.26E-05', 'SUFF1_P*': '2.78E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(attempt)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '78959'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(bridge)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '26961'}, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(drama)}>ic>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '16813', 'SUFF1_PFMF': '4.73', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(change)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '348247'}, None, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>}', 'PREF1_PFMF': '7.63', 'PREF1_FamSize': '499', 'PREF1_Freq_HAL': '2175658', 'PREF1_length': '2', 'PREF1_P': '1.38E-05', 'PREF1_P*': '4.63E-03', 'ROOT1_PFMF': '10.6', 'ROOT1_FamSize': '67', 'ROOT1_Freq_HAL': '559015', 'SUFF1_PFMF': '8.32', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(able)}>ity>', 'ROOT1_PFMF': '10', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '328113', 'SUFF1_PFMF': '0.86', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, None, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(assumption)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '17588'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ance>', 'ROOT1_PFMF': '71.42', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '46378', 'SUFF1_PFMF': '22.98', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(neg)>ate>}', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '29916', 'SUFF1_PFMF': '22.11', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(need)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '591543'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(people)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '798705'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(create)}>ive>>ly>', 'ROOT1_PFMF': '63.63', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '227932', 'SUFF1_PFMF': '30.39', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1037354', 'SUFF1_length': '3', 'SUFF1_P': '1.74E-05', 'SUFF1_P*': '2.78E-03', 'SUFF2_PFMF': '17.12', 'SUFF2_FamSize': '2898', 'SUFF2_Freq_HAL': '3857999', 'SUFF2_length': '2', 'SUFF2_P': '4.48E-05', 'SUFF2_P*': '0.02'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(solve)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '15', 'ROOT1_Freq_HAL': '67167'}, {'POS': 'JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(previous)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '87239'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(quandary)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '184'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(look)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '628585'}, {'POS': 'RB|JJ|NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(back)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '69', 'ROOT1_Freq_HAL': '549038'}, None, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>}', 'PREF1_PFMF': '8.33', 'PREF1_FamSize': '13', 'PREF1_Freq_HAL': '58022', 'PREF1_length': '5', 'PREF1_P': '0', 'PREF1_P*': '0', 'ROOT1_PFMF': '7.57', 'ROOT1_FamSize': '67', 'ROOT1_Freq_HAL': '559015', 'SUFF1_PFMF': '4.69', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'minor|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(one)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '2327675'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(could)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '610350'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(argue)}', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '113797'}, None, {'POS': 'minor|NN|RB', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(with)}{(out)}', 'ROOT1_PFMF': '6.66', 'ROOT1_FamSize': '16', 'ROOT1_Freq_HAL': '3580107', 'ROOT2_PFMF': '2.94', 'ROOT2_FamSize': '35', 'ROOT2_Freq_HAL': '1415807'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(car)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '121302'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(compute)}>er>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '344318', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '2274', 'SUFF1_Freq_HAL': '4569119', 'SUFF1_length': '2', 'SUFF1_P': '3.55E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(mobile)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '19217'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(phon)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '41', 'ROOT1_Freq_HAL': '207292'}, None, None, {'POS': 'JJ', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{al>', 'PREF1_PFMF': '5.55', 'PREF1_FamSize': '19', 'PREF1_Freq_HAL': '17644', 'PREF1_length': '4', 'PREF1_P': '1.70E-04', 'PREF1_P*': '4.63E-04', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '7461', 'SUFF1_PFMF': '15.52', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(work)}>er>', 'ROOT1_PFMF': '2.35', 'ROOT1_FamSize': '86', 'ROOT1_Freq_HAL': '1051110', 'SUFF1_PFMF': '1.75', 'SUFF1_FamSize': '2274', 'SUFF1_Freq_HAL': '4569119', 'SUFF1_length': '2', 'SUFF1_P': '3.55E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(would)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '1366583'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(need)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '591543'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(find)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '435097'}, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(altern)>ate>)}', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '60362', 'SUFF1_PFMF': '5.12', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(method)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '92974'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '510787', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ate>}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '16', 'ROOT1_Freq_HAL': '144552', 'SUFF1_PFMF': '0.1', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01', 'SUFF2_PFMF': '0.43', 'SUFF2_FamSize': '1599', 'SUFF2_Freq_HAL': '6530204', 'SUFF2_length': '3', 'SUFF2_P': '9.03E-06', 'SUFF2_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'JJ|NN|RB|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(short)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '134285'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(circuit)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '24657'}, None, {'POS': 'VB|NN|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(make)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '40', 'ROOT1_Freq_HAL': '1073485'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(problem)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '510114'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(obsolete)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '4914'}, None, None, {'POS': 'RB|minor', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(how)}{(ever)}', 'ROOT1_PFMF': '20', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '1012869', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '22', 'ROOT2_Freq_HAL': '564255'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(rely)}>ance>', 'ROOT1_PFMF': '71.42', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '46378', 'SUFF1_PFMF': '22.98', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(necess)>ory>}>ly>', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '107363', 'SUFF1_PFMF': '1.12', 'SUFF1_FamSize': '356', 'SUFF1_Freq_HAL': '725186', 'SUFF1_length': '3', 'SUFF1_P': '2.62E-05', 'SUFF1_P*': '2.94E-03', 'SUFF2_PFMF': '1.17', 'SUFF2_FamSize': '2898', 'SUFF2_Freq_HAL': '3857999', 'SUFF2_length': '2', 'SUFF2_P': '4.48E-05', 'SUFF2_P*': '0.02'}, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ive>>ity>', 'ROOT1_PFMF': '36.36', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '227932', 'SUFF1_PFMF': '10.53', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1037354', 'SUFF1_length': '3', 'SUFF1_P': '1.74E-05', 'SUFF1_P*': '2.78E-03', 'SUFF2_PFMF': '11.57', 'SUFF2_FamSize': '580', 'SUFF2_Freq_HAL': '1647588', 'SUFF2_length': '3', 'SUFF2_P': '1.76E-05', 'SUFF2_P*': '4.48E-03'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(mark)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '29', 'ROOT1_Freq_HAL': '181577'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(specie)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '249'}, None, None, {'POS': 'RB|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(prior)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '43904'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(example)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '171361'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(reveal)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '32067'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(allow)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '185956'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(convenience)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '8009'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(car)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '121302'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(compute)}>er>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '344318', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '2274', 'SUFF1_Freq_HAL': '4569119', 'SUFF1_length': '2', 'SUFF1_P': '3.55E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(phon)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '41', 'ROOT1_Freq_HAL': '207292'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(release)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '144416'}, {'POS': 'JJ', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(add)}>ion>>al>', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '332926', 'SUFF1_PFMF': '1.25', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03', 'SUFF2_PFMF': '1.04', 'SUFF2_FamSize': '1431', 'SUFF2_Freq_HAL': '4704731', 'SUFF2_length': '2', 'SUFF2_P': '9.14E-06', 'SUFF2_P*': '6.64E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(time)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '1099121'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(people)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '798705'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(live)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '291215'}, None, {'POS': 'RB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(efficient)}>ly>', 'ROOT1_PFMF': '33.33', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '19176', 'SUFF1_PFMF': '6.86', 'SUFF1_FamSize': '2898', 'SUFF1_Freq_HAL': '3857999', 'SUFF1_length': '2', 'SUFF1_P': '4.48E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(efficiency)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '8343'}, None, None, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(human)}>ity>', 'ROOT1_PFMF': '5.88', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937', 'SUFF1_PFMF': '4.14', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(tackle)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '3482'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(new)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '973761'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(problem)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '510114'}, None, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(may)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '538146'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(create)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '227932'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(new)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '973761'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(issue)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '221311'}, None, None, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(exist)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '141837'}, {'POS': 'minor|NN|RB', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(with)}{(out)}', 'ROOT1_PFMF': '6.66', 'ROOT1_FamSize': '16', 'ROOT1_Freq_HAL': '3580107', 'ROOT2_PFMF': '2.94', 'ROOT2_FamSize': '35', 'ROOT2_Freq_HAL': '1415807'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(example)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '171361'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(proliferate)}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '1921', 'SUFF1_PFMF': '29.97', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'PREF1_PFMF': '17.61', 'PREF1_FamSize': '370', 'PREF1_Freq_HAL': '1256048', 'PREF1_length': '2', 'PREF1_P': '1.51E-05', 'PREF1_P*': '2.94E-03', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '43', 'ROOT1_Freq_HAL': '651056', 'SUFF1_PFMF': '17.14', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(globe)}>al>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '38772', 'SUFF1_PFMF': '2.65', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(scale)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '32879'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '63237', 'SUFF1_PFMF': '0.52', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{al>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '38772', 'SUFF1_PFMF': '2.65', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN|VB|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(warm)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '29760'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(become)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '143379'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(concern)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '87873'}, {'POS': 'JJ', 'Nmorph': '4', 'PRS_signature': '"2,1,1"', 'MorphoLexSegm': 'able>', 'PREF1_PFMF': '18.87', 'PREF1_FamSize': '658', 'PREF1_Freq_HAL': '610746', 'PREF1_length': '2', 'PREF1_P': '4.26E-05', 'PREF1_P*': '4.02E-03', 'PREF2_PFMF': '42.27', 'PREF2_FamSize': '370', 'PREF2_Freq_HAL': '1256048', 'PREF2_length': '2', 'PREF2_P': '1.51E-05', 'PREF2_P*': '2.94E-03', 'ROOT1_PFMF': '42.1', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '181726', 'SUFF1_PFMF': '19.74', 'SUFF1_FamSize': '872', 'SUFF1_Freq_HAL': '1227992', 'SUFF1_length': '4', 'SUFF1_P': '3.18E-05', 'SUFF1_P*': '6.03E-03'}, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(horse)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '35', 'ROOT1_Freq_HAL': '39630'}, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(buggy)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '4547'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(gener)>ate>}>ion>', 'ROOT1_PFMF': '15.78', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '318254', 'SUFF1_PFMF': '1.7', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01', 'SUFF2_PFMF': '2.31', 'SUFF2_FamSize': '1599', 'SUFF2_Freq_HAL': '6530204', 'SUFF2_length': '3', 'SUFF2_P': '9.03E-06', 'SUFF2_P*': '9.11E-03'}, None, {'POS': 'RB', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(like)}>wise>', 'ROOT1_PFMF': '6.84', 'ROOT1_FamSize': '74', 'ROOT1_Freq_HAL': '1232275', 'SUFF1_PFMF': '5.55', 'SUFF1_FamSize': '19', 'SUFF1_Freq_HAL': '77424', 'SUFF1_length': '4', 'SUFF1_P': '2.58E-05', 'SUFF1_P*': '3.09E-04'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ance>', 'PREF1_PFMF': '14.23', 'PREF1_FamSize': '275', 'PREF1_Freq_HAL': '559431', 'PREF1_length': '2', 'PREF1_P': '4.65E-05', 'PREF1_P*': '4.02E-03', 'ROOT1_PFMF': '33.33', 'ROOT1_FamSize': '34', 'ROOT1_Freq_HAL': '169032', 'SUFF1_PFMF': '20.8', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(oil)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '15', 'ROOT1_Freq_HAL': '39902'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(create)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '227932'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(nation)}', 'ROOT1_PFMF': '9.09', 'ROOT1_FamSize': '23', 'ROOT1_Freq_HAL': '303858'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(state)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '30', 'ROOT1_Freq_HAL': '451541'}, None, None, None, {'POS': 'JJ|NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ant>', 'PREF1_PFMF': '4.01', 'PREF1_FamSize': '275', 'PREF1_Freq_HAL': '559431', 'PREF1_length': '2', 'PREF1_P': '4.65E-05', 'PREF1_P*': '4.02E-03', 'ROOT1_PFMF': '6.06', 'ROOT1_FamSize': '34', 'ROOT1_Freq_HAL': '169032', 'SUFF1_PFMF': '5.61', 'SUFF1_FamSize': '464', 'SUFF1_Freq_HAL': '1534593', 'SUFF1_length': '3', 'SUFF1_P': '7.82E-06', 'SUFF1_P*': '1.85E-03'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(tax)}>ion>', 'ROOT1_PFMF': '33.33', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '112326', 'SUFF1_PFMF': '16.52', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(allow)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '185956'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(rule)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '149114'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(party)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '91368'}, None, {'POS': 'VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ity>', 'ROOT1_PFMF': '100', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '48418', 'SUFF1_PFMF': '2.76', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(group)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '401691'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(woman)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '38', 'ROOT1_Freq_HAL': '74275'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(solut)}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '83940', 'SUFF1_PFMF': '0.5', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '54696', 'SUFF1_PFMF': '9.32', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(maverick)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '1631'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(sci)>ant>>ist>}', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '204888', 'SUFF1_PFMF': '3.23', 'SUFF1_FamSize': '464', 'SUFF1_Freq_HAL': '1534593', 'SUFF1_length': '3', 'SUFF1_P': '7.82E-06', 'SUFF1_P*': '1.85E-03', 'SUFF2_PFMF': '0.21', 'SUFF2_FamSize': '462', 'SUFF2_Freq_HAL': '382916', 'SUFF2_length': '3', 'SUFF2_P': '6.27E-05', 'SUFF2_P*': '3.71E-03'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(politic)}>ian>', 'ROOT1_PFMF': '10', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '92034', 'SUFF1_PFMF': '4.04', 'SUFF1_FamSize': '174', 'SUFF1_Freq_HAL': '394113', 'SUFF1_length': '3', 'SUFF1_P': '2.79E-05', 'SUFF1_P*': '1.70E-03'}, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(contrast)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '10700'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(state)}>ment>', 'ROOT1_PFMF': '3.44', 'ROOT1_FamSize': '30', 'ROOT1_Freq_HAL': '451541', 'SUFF1_PFMF': '1.04', 'SUFF1_FamSize': '288', 'SUFF1_Freq_HAL': '1423689', 'SUFF1_length': '4', 'SUFF1_P': '1.26E-05', 'SUFF1_P*': '2.78E-03'}, None, None, None, {'POS': 'RB|JJ|NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(even)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '494850'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(see)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '21', 'ROOT1_Freq_HAL': '800097'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(imagine)}>ion>', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '54696', 'SUFF1_PFMF': '9.32', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(consider)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '14', 'ROOT1_Freq_HAL': '214085'}, None, None, {'POS': 'NN|JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(digit)}>al>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '53861', 'SUFF1_PFMF': '1.32', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>}', 'PREF1_PFMF': '5.42', 'PREF1_FamSize': '499', 'PREF1_Freq_HAL': '2175658', 'PREF1_length': '2', 'PREF1_P': '1.38E-05', 'PREF1_P*': '4.63E-03', 'ROOT1_PFMF': '11.76', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '158531', 'SUFF1_PFMF': '6.19', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(advent)}', 'ROOT1_PFMF': '9.09', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '28013'}, None, None, None, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(allow)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '185956'}, None, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': 'ar>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '19', 'ROOT1_Freq_HAL': '93743', 'SUFF1_PFMF': '2.34', 'SUFF1_FamSize': '129', 'SUFF1_Freq_HAL': '533962', 'SUFF1_length': '2', 'SUFF1_P': '7.49E-06', 'SUFF1_P*': '6.18E-04'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(portal)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '2394'}, None, {'POS': 'JJ|NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(medic)>al>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '65461', 'SUFF1_PFMF': '1.18', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(inform)}>ion>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '510787', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(permit)}', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '48680'}, {'POS': 'JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(patient)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '32789'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(self)}', 'ROOT1_PFMF': '22.72', 'ROOT1_FamSize': '23', 'ROOT1_Freq_HAL': '458870'}, {'POS': 'NN|VB', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ly>', 'ROOT1_PFMF': '100', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '87239', 'SUFF1_PFMF': '1.82', 'SUFF1_FamSize': '2898', 'SUFF1_Freq_HAL': '3857999', 'SUFF1_length': '2', 'SUFF1_P': '4.48E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(close)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '16', 'ROOT1_Freq_HAL': '188225'}, None, None, None, {'POS': 'JJ|NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(medic)>al>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '65461', 'SUFF1_PFMF': '1.18', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(lay)}(man)', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '25', 'ROOT1_Freq_HAL': '55753', 'ROOT2_PFMF': '10', 'ROOT2_FamSize': '211', 'ROOT2_Freq_HAL': '300379'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ory>', 'PREF1_PFMF': '20.35', 'PREF1_FamSize': '114', 'PREF1_Freq_HAL': '343106', 'PREF1_length': '5', 'PREF1_P': '8.74E-06', 'PREF1_P*': '4.63E-04', 'ROOT1_PFMF': '20', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '13680', 'SUFF1_PFMF': '21.97', 'SUFF1_FamSize': '356', 'SUFF1_Freq_HAL': '725186', 'SUFF1_length': '3', 'SUFF1_P': '2.62E-05', 'SUFF1_P*': '2.94E-03'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': 'ion>', 'PREF1_PFMF': '4.42', 'PREF1_FamSize': '114', 'PREF1_Freq_HAL': '343106', 'PREF1_length': '5', 'PREF1_P': '8.74E-06', 'PREF1_P*': '4.63E-04', 'ROOT1_PFMF': '17.24', 'ROOT1_FamSize': '59', 'ROOT1_Freq_HAL': '716577', 'SUFF1_PFMF': '7.07', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>', 'PREF1_PFMF': '19.88', 'PREF1_FamSize': '358', 'PREF1_Freq_HAL': '2402388', 'PREF1_length': '2', 'PREF1_P': '6.66E-06', 'PREF1_P*': '2.47E-03', 'ROOT1_PFMF': '6.25', 'ROOT1_FamSize': '17', 'ROOT1_Freq_HAL': '20108', 'SUFF1_PFMF': '19.14', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(arrive)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '37242'}, None, None, None, {'POS': 'JJ|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(surprise)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '52522'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(corner)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '32765'}, None, None, None, None, {'POS': 'minor|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(one)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '2327675'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(architect)}', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '30688'}, None, None, None, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(base)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '341952'}, None, {'POS': 'NN|minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(idea)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '203042'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(emergency)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '14348'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(care)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '174627'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(technique)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '44405'}, None, None, {'POS': 'JJ|RB', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': 'ly>', 'PREF1_PFMF': '0.87', 'PREF1_FamSize': '798', 'PREF1_Freq_HAL': '538545', 'PREF1_length': '2', 'PREF1_P': '7.24E-05', 'PREF1_P*': '6.03E-03', 'ROOT1_PFMF': '4.1', 'ROOT1_FamSize': '74', 'ROOT1_Freq_HAL': '1232275', 'SUFF1_PFMF': '2.55', 'SUFF1_FamSize': '2898', 'SUFF1_Freq_HAL': '3857999', 'SUFF1_length': '2', 'SUFF1_P': '4.48E-05', 'SUFF1_P*': '0.02'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(marriage)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '27179'}, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(econom)>ic>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '84011', 'SUFF1_PFMF': '0.49', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(medicine)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '21940'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(heal)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '19345'}, {'POS': 'JJ|NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(tense)}', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '11367'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(environ)}>ment>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '105667', 'SUFF1_PFMF': '0.69', 'SUFF1_FamSize': '288', 'SUFF1_Freq_HAL': '1423689', 'SUFF1_length': '4', 'SUFF1_P': '1.26E-05', 'SUFF1_P*': '2.78E-03'}, None, None, None, None, None, None, None, None, None, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(last)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '306761'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(example)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '171361'}, {'POS': 'VB|NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(act)>al>}>y>', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '59', 'ROOT1_Freq_HAL': '716577', 'SUFF1_PFMF': '0', 'SUFF1_FamSize': '1431', 'SUFF1_Freq_HAL': '4704731', 'SUFF1_length': '2', 'SUFF1_P': '9.14E-06', 'SUFF1_P*': '6.64E-03', 'SUFF2_PFMF': '0.12', 'SUFF2_FamSize': '2486', 'SUFF2_Freq_HAL': '3870233', 'SUFF2_length': '1', 'SUFF2_P': '4.99E-05', 'SUFF2_P*': '0.02'}, {'POS': 'VB|NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ity>', 'ROOT1_PFMF': '5.88', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937', 'SUFF1_PFMF': '4.14', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ance>', 'ROOT1_PFMF': '71.42', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '46378', 'SUFF1_PFMF': '22.98', 'SUFF1_FamSize': '323', 'SUFF1_Freq_HAL': '977837', 'SUFF1_length': '4', 'SUFF1_P': '7.16E-06', 'SUFF1_P*': '1.08E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'JJ', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': 'ate>}>ion>', 'PREF1_PFMF': '13.68', 'PREF1_FamSize': '96', 'PREF1_Freq_HAL': '397964', 'PREF1_length': '1', 'PREF1_P': '1.76E-05', 'PREF1_P*': '1.08E-03', 'ROOT1_PFMF': '66.66', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '33227', 'SUFF1_PFMF': '15.17', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01', 'SUFF2_PFMF': '19.21', 'SUFF2_FamSize': '1599', 'SUFF2_Freq_HAL': '6530204', 'SUFF2_length': '3', 'SUFF2_P': '9.03E-06', 'SUFF2_P*': '9.11E-03'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(small)}{(pox)}', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '191331', 'ROOT2_PFMF': '33.33', 'ROOT2_FamSize': '4', 'ROOT2_Freq_HAL': '1740'}, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(disease)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '30119'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(ravage)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '1089'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(race)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '23', 'ROOT1_Freq_HAL': '111227'}, {'POS': 'minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(since)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '320454'}, {'POS': 'JJ', 'Nmorph': '4', 'PRS_signature': '"1,1,2"', 'MorphoLexSegm': 'ic>}>al>', 'PREF1_PFMF': '81.34', 'PREF1_FamSize': '135', 'PREF1_Freq_HAL': '155948', 'PREF1_length': '3', 'PREF1_P': '1.03E-04', 'PREF1_P*': '2.47E-03', 'ROOT1_PFMF': '83.33', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '124154', 'SUFF1_PFMF': '78.47', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03', 'SUFF2_PFMF': '83.77', 'SUFF2_FamSize': '1431', 'SUFF2_Freq_HAL': '4704731', 'SUFF2_length': '2', 'SUFF2_P': '9.14E-06', 'SUFF2_P*': '6.64E-03'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(day)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '35', 'ROOT1_Freq_HAL': '778343'}, None, None, {'POS': 'RB|minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(yet)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '159994'}, None, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(vaccine)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '13304'}, None, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, {'POS': 'VB|NN|JJ', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(think)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '746094'}, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(dare)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '15225'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(imagine)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '54696'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(world)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '345235'}, {'POS': 'JJ|NN|VB|RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(free)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '28', 'ROOT1_Freq_HAL': '323757'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(small)}{(pox)}', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '191331', 'ROOT2_PFMF': '33.33', 'ROOT2_FamSize': '4', 'ROOT2_Freq_HAL': '1740'}, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(use)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '15', 'ROOT1_Freq_HAL': '1414857'}, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(battle)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '40439'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(plan)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '3', 'ROOT1_Freq_HAL': '145043'}, None, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(draw)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '10', 'ROOT1_Freq_HAL': '76896'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,2,0"', 'MorphoLexSegm': '{(small)}{(pox)}', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '191331', 'ROOT2_PFMF': '33.33', 'ROOT2_FamSize': '4', 'ROOT2_Freq_HAL': '1740'}, None, {'POS': 'RB', 'Nmorph': '3', 'PRS_signature': '"0,1,2"', 'MorphoLexSegm': '{(system)}>ic>>ly>', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '688810', 'SUFF1_PFMF': '14.11', 'SUFF1_FamSize': '1014', 'SUFF1_Freq_HAL': '1472797', 'SUFF1_length': '2', 'SUFF1_P': '2.78E-05', 'SUFF1_P*': '6.33E-03', 'SUFF2_PFMF': '10.42', 'SUFF2_FamSize': '2898', 'SUFF2_Freq_HAL': '3857999', 'SUFF2_length': '2', 'SUFF2_P': '4.48E-05', 'SUFF2_P*': '0.02'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(target)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '36927'}, None, {'POS': 'VB', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ate>}', 'PREF1_PFMF': '23.15', 'PREF1_FamSize': '96', 'PREF1_Freq_HAL': '397964', 'PREF1_length': '1', 'PREF1_P': '1.76E-05', 'PREF1_P*': '1.08E-03', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '13255', 'SUFF1_PFMF': '27.02', 'SUFF1_FamSize': '937', 'SUFF1_Freq_HAL': '2569118', 'SUFF1_length': '3', 'SUFF1_P': '2.65E-05', 'SUFF1_P*': '0.01'}, None, None, None, None, {'POS': 'RB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(always)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '200870'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(mark)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '29', 'ROOT1_Freq_HAL': '181577'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(experience)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '4', 'ROOT1_Freq_HAL': '297837'}, None, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(discover)}>y>', 'ROOT1_PFMF': '25', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '44840', 'SUFF1_PFMF': '2.45', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(fire)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '50', 'ROOT1_Freq_HAL': '118119'}, None, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"1,1,1"', 'MorphoLexSegm': '{ion>', 'PREF1_PFMF': '3.64', 'PREF1_FamSize': '358', 'PREF1_Freq_HAL': '2402388', 'PREF1_length': '2', 'PREF1_P': '6.66E-06', 'PREF1_P*': '2.47E-03', 'ROOT1_PFMF': '16.66', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '88079', 'SUFF1_PFMF': '2.56', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None, None, None, {'POS': 'VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(give)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '455818'}, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(history)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '13', 'ROOT1_Freq_HAL': '124154', 'SUFF1_PFMF': '0.44', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, None, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(human)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '18', 'ROOT1_Freq_HAL': '222937'}, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(race)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '23', 'ROOT1_Freq_HAL': '111227'}, None, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(limit)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '12', 'ROOT1_Freq_HAL': '155817'}, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(number)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '364121'}, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(problem)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '5', 'ROOT1_Freq_HAL': '510114'}, None, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(new)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '973761'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(old)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '281185'}, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(tackle)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '3482'}, None, None, None, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(need)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '11', 'ROOT1_Freq_HAL': '591543'}, None, {'POS': 'NN|VB', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(retreat)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '4397'}, None, None, None, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(attitude)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '2', 'ROOT1_Freq_HAL': '26046'}, None, {'POS': 'JJ|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(new)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '46', 'ROOT1_Freq_HAL': '973761'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(thing)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '9', 'ROOT1_Freq_HAL': '1328712'}, None, None, {'POS': 'RB|minor', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(rather)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '146049'}, {'POS': 'VB|NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(embrace)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '5662'}, None, {'POS': 'JJ|NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(hope)}>ful>', 'ROOT1_PFMF': '50', 'ROOT1_FamSize': '7', 'ROOT1_Freq_HAL': '208147', 'SUFF1_PFMF': '10.23', 'SUFF1_FamSize': '343', 'SUFF1_Freq_HAL': '429561', 'SUFF1_length': '3', 'SUFF1_P': '5.82E-05', 'SUFF1_P*': '3.86E-03'}, {'POS': 'NN', 'Nmorph': '1', 'PRS_signature': '"0,1,0"', 'MorphoLexSegm': '{(posture)}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '1', 'ROOT1_Freq_HAL': '2967'}, None, None, {'POS': 'NN', 'Nmorph': '2', 'PRS_signature': '"0,1,1"', 'MorphoLexSegm': '{(possible)}>ity>', 'ROOT1_PFMF': '40', 'ROOT1_FamSize': '6', 'ROOT1_Freq_HAL': '259443', 'SUFF1_PFMF': '2.24', 'SUFF1_FamSize': '580', 'SUFF1_Freq_HAL': '1647588', 'SUFF1_length': '3', 'SUFF1_P': '1.76E-05', 'SUFF1_P*': '4.48E-03'}, None, {'POS': 'NN', 'Nmorph': '3', 'PRS_signature': '"0,2,1"', 'MorphoLexSegm': '{(tech)(log)>y>}', 'ROOT1_PFMF': '0', 'ROOT1_FamSize': '20', 'ROOT1_Freq_HAL': '322084', 'ROOT2_PFMF': '0', 'ROOT2_FamSize': '215', 'ROOT2_Freq_HAL': '380513', 'SUFF1_PFMF': '0.16', 'SUFF1_FamSize': '2486', 'SUFF1_Freq_HAL': '3870233', 'SUFF1_length': '1', 'SUFF1_P': '4.99E-05', 'SUFF1_P*': '0.02'}, {'POS': 'VB|NN', 'Nmorph': '2', 'PRS_signature': '"1,1,0"', 'MorphoLexSegm': '{ion>', 'ROOT1_PFMF': '14.28', 'ROOT1_FamSize': '8', 'ROOT1_Freq_HAL': '54696', 'SUFF1_PFMF': '9.32', 'SUFF1_FamSize': '1599', 'SUFF1_Freq_HAL': '6530204', 'SUFF1_length': '3', 'SUFF1_P': '9.03E-06', 'SUFF1_P*': '9.11E-03'}, None] - self.assertEqual(doc._.morpholex,morpholex) - - def test_morpholexsegm(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - - morpholexsegm = [None, '{(state)}>ment>', '{(link)}', '{(tech)(log)>y>}', '{(neg)>ate>}>ive>>ly>', None, '{(free)}', '{(think)}', '{(play)}', None, '{(recent)}', '{(human)}', '{(experience)}', None, None, '{(past)}', '{(century)}', None, '{(sure)}>ly>', None, None, None, None, '{(time)}', None, '{(history)>y>}', None, None, '{(live)}', '{(live)}', None, '{(people)}', None, '{(change)}', None, '{(drama)}>ic>>ly>', None, None, '{(quick)}', '{ion>', None, None, '{(typo)}>al>', '{(day)}', '{(reveal)}', None, '{(tech)(log)>y>}', None, '{ion>}>ize>', None, '{(world)}', None, None, '{(people)}', '{al>}', '{(combust)}>ion>', '{(engine)}', None, None, None, '{(work)}{(day)}', None, '{(chance)}', None, '{(high)}', None, None, '{(employ)}>ee>', None, 'er>', None, '{ion>', None, '{(silic)>on>}', '{(bridge)}', None, None, None, '{(micro)>on>}', '{(wide)}', None, '{(upon)}', '{(leave)}', '{(home)}', None, '{(family)}', '{(member)}', None, None, '{(reach)}', None, '{(wire)}>less>', '{(net)}{(work)}', None, '{(util)>ize>}', '{(satellite)}', '{(orbit)}', None, '{(earth)}', None, None, None, None, '{(common)}', '{(occur)}>ance>', '{(could)}', None, None, 'able>', None, None, '{(turn)}', None, None, None, '{(century)}', None, None, None, '{(state)}>ment>', '{(attempt)}', None, '{(bridge)}', None, '{(drama)}>ic>', '{(change)}', None, None, '{ion>}', None, None, '{(able)}>ity>', None, '{(human)}', None, '{(think)}', None, None, None, None, '{(assumption)}', None, None, None, '{ance>', None, '{(tech)(log)>y>}', '{(neg)>ate>}', None, '{(need)}', None, '{(people)}', None, '{(think)}', '{(create)}>ive>>ly>', None, '{(solve)}', '{(previous)}', '{(quandary)}', None, '{(look)}', '{(back)}', None, None, '{ion>}', None, '{(one)}', '{(could)}', '{(argue)}', None, '{(with)}{(out)}', None, '{(car)}', None, '{(compute)}>er>', None, None, '{(mobile)}', '{(phon)}', None, None, '{al>', '{(work)}>er>', '{(would)}', '{(need)}', None, '{(find)}', '{(altern)>ate>)}', '{(method)}', None, '{ion>', '{ate>}>ion>', None, '{(tech)(log)>y>}', '{(short)}', '{(circuit)}', None, '{(think)}', None, '{(make)}', None, '{(problem)}', '{(obsolete)}', None, None, '{(how)}{(ever)}', None, None, '{(rely)}>ance>', None, '{(tech)(log)>y>}', None, None, '{(necess)>ory>}>ly>', '{ive>>ity>', None, '{(mark)}', None, '{(human)}', '{(species)}', None, None, '{(prior)}', '{(example)}', '{(reveal)}', None, '{(tech)(log)>y>}', '{(allow)}', None, '{(convenience)}', None, None, '{(car)}', None, '{(compute)}>er>', None, '{(phon)}', None, '{(release)}', '{(add)}>ion>>al>', '{(time)}', None, '{(people)}', None, '{(live)}', None, '{(efficient)}>ly>', None, None, '{(efficiency)}', None, None, '{y>}', '{(free)}', '{(human)}>ity>', None, None, None, '{(tackle)}', '{(new)}', '{(problem)}', None, None, '{(may)}', None, '{(create)}', '{(new)}', '{(issue)}', None, None, None, '{(exist)}', '{(with)}{(out)}', '{(tech)(log)>y>}', None, None, '{(example)}', None, None, '{(proliferate)}>ion>', None, '{ion>', None, None, '{(globe)}>al>', '{(scale)}', None, None, '{y>}', '{al>', '{(warm)}', '{(become)}', None, '{(concern)}', 'able>', None, None, '{(horse)}', None, None, None, '{(buggy)}', '{(gener)>ate>}>ion>', None, '{(like)}>wise>', '{ance>', None, '{(oil)}', None, '{(create)}', '{(nation)}', None, '{(state)}', None, None, None, '{ant>', None, '{(tax)}>ion>', None, '{(allow)}', '{(rule)}', '{(party)}', None, '{ity>', '{(group)}', None, None, '{(women)}', None, '{(solut)}>ion>', None, None, '{ion>', None, '{(maverick)}', '{(sci)>ant>>ist>}', None, '{(politic)}>ian>', None, None, None, '{(contrast)}', None, None, '{(state)}>ment>', None, None, None, '{(even)}', '{(see)}', None, '{(tech)(log)>y>}', '{(free)}', None, '{(human)}', '{(imagine)}>ion>', None, '{(consider)}', None, None, '{(digit)}>al>', '{ion>}', None, None, '{(advent)}', None, None, None, None, '{(allow)}', None, None, 'ar>}', None, '{(portal)}', None, '{(medic)>al>}', '{(inform)}>ion>', None, '{(permit)}', '{(patient)}', None, '{(self)}', '{ly>', '{(close)}', None, None, None, '{(medic)>al>}', '{(lay)}(man)', None, None, '{ory>', 'ion>', None, '{ion>', None, '{(arrive)}', None, None, None, '{(surprise)}', '{(corner)}', None, None, None, None, '{(one)}', None, None, '{(architect)}', None, None, None, '{(mill)>en>>ium>}', '{(develop)}>ment>', '{(goal)}', None, '{(base)}', None, '{(idea)}', None, '{(emergency)}', '{(care)}', None, '{(technique)}', None, None, 'ly>', '{(marriage)}', None, '{(econom)>ics>}', None, '{(medicine)}', None, '{(heal)}', '{(tense)}', None, None, '{(environ)}>ment>', None, '{(south)}', '{(america)}', None, '{(east)}>ern>', '{(europe)}', None, None, None, '{(last)}', '{(example)}', '{y>}', '{(act)>al>}>y>', '{ity>', None, None, '{ance>', None, '{(tech)(log)>y>}', None, 'ate>}>ion>', None, '{(small)}{(pox)}', None, None, '{(disease)}', None, '{(ravage)}', None, '{(human)}', '{(race)}', '{(since)}', 'ic>}>al>', '{(day)}', None, None, '{(yet)}', None, None, '{(tech)(log)>y>}', None, '{(vaccine)}', None, '{(free)}', '{(think)}', '{(human)}', '{(dare)}', None, '{(imagine)}', None, '{(world)}', '{(free)}', None, '{(small)}{(pox)}', None, '{(use)}', '{(tech)(log)>y>}', None, '{(battle)}', '{(plan)}', None, '{(draw)}>n>', None, None, None, '{(small)}{(pox)}', None, '{(system)}>ic>>ly>', None, None, '{ate>}', None, None, '{(tech)(log)>y>}', None, '{(always)}', '{(mark)}', None, '{(human)}', '{(experience)}', None, None, None, '{(discover)}>y>', None, '{(fire)}', None, None, '{ion>', None, None, None, '{(give)}>en>', None, '{(history)>y>}', None, None, '{(human)}', '{(race)}', None, None, None, None, None, '{(limit)}', None, None, '{(number)}', None, '{(problem)}', None, None, '{(new)}', None, '{(old)}', None, None, None, None, '{(tackle)}', None, None, None, None, '{(need)}', None, '{(retreat)}', None, None, '{(ludd)}>ite>', '{(attitude)}', None, '{(new)}', '{(thing)}', None, None, '{(rather)}', '{(embrace)}', None, '{(hope)}>ful>', '{(posture)}', None, None, '{(possible)}>ity>', None, '{(tech)(log)>y>}', '{ion>', None] - self.assertEqual(doc._.morpholexSegm,morpholexsegm) - - def test_morphnums(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - morphnums = [None, '2', '1', '3', '4', None, '1', '1', '1', None, '1', '1', '1', None, None, '1', '1', None, '2', None, None, None, None, '1', None, '2', None, None, '1', '1', None, '1', None, '1', None, '3', None, None, '1', '3', None, None, '2', '1', '1', None, '3', None, '4', None, '1', None, None, '1', '2', None, '1', None, None, '2', None, '1', None, None, '3', '2', '1', None, None, None, '2', None, '1', None, '1', None, None, '2', None, '2', None, None, '2', None, '2', '2', None, '2', '1', None, None, None, '2', '1', None, '1', '1', '1', None, '1', '1', None, None, '1', None, '2', '2', None, '2', '1', '1', None, '1', None, None, None, None, '1', '2', '1', None, None, '4', None, None, '1', None, None, None, '1', None, None, None, '2', '1', None, '1', None, '2', '1', None, None, '3', None, None, '2', None, '1', None, '1', None, None, None, None, '1', None, None, None, '2', '2', None, '3', '2', None, '1', None, '1', None, '1', '3', None, '1', '1', '1', None, '1', '1', None, None, '3', None, '1', '1', '1', None, '2', None, '1', None, '2', None, None, '1', '1', None, None, '3', '2', '1', '1', None, '1', '2', '1', None, '2', None, '2', '2', None, '3', None, '3', '1', '1', None, '1', None, '1', None, '1', '1', None, None, '2', None, None, '2', None, '3', None, None, '3', '2', None, '3', None, '1', None, '1', '1', None, None, '1', '1', '1', None, '3', '1', None, '1', None, None, '1', None, '2', None, '1', None, '1', '3', '1', None, '1', None, '1', None, '2', None, None, '1', None, None, '2', None, '1', None, '1', None, '1', None, None, None, None, '1', None, '3', '1', '2', None, None, None, '1', '1', '1', None, None, '1', None, '1', '1', '1', None, None, None, '1', '2', '3', None, None, '1', None, None, '2', None, '2', None, '2', None, '1', None, '1', '3', None, None, '2', '1', None, None, '2', '2', '2', None, '2', '1', None, '2', '1', '1', None, '1', '4', None, None, '1', None, None, None, '1', '3', None, '2', '3', None, '1', None, '1', '1', None, '1', None, None, None, '3', None, '2', None, '1', '1', '1', None, '2', '2', '1', None, None, '1', None, '2', None, None, '2', '1', '2', None, '2', '2', None, '1', '3', None, '2', None, None, None, '1', None, None, '2', None, None, None, '1', '1', None, '3', '1', None, '1', '2', None, '1', None, None, '2', '3', None, None, '1', None, None, None, None, '1', None, None, '2', '2', None, '1', None, None, None, None, '2', None, '1', None, '2', '2', None, '1', '1', None, '1', '2', '1', None, None, None, '1', '1', '1', None, None, '1', '1', '2', None, '1', None, None, '2', '1', None, None, None, '2', '2', None, None, '2', '3', '3', None, '3', None, '1', None, None, None, '1', '1', None, None, None, None, '1', None, None, '1', None, None, None, '3', '2', '1', None, '1', None, '1', None, '1', '1', None, '1', None, None, '3', '1', None, '2', None, '1', None, '1', '1', None, None, '2', None, '1', '1', None, '2', '1', None, None, None, '1', '1', '2', None, None, '1', None, None, '3', '3', '2', '1', None, None, '1', None, '2', None, None, '2', None, '2', None, '3', None, '2', '1', None, None, None, '1', None, '1', None, None, '1', None, '1', '1', None, '1', '4', None, '2', None, None, '1', None, '1', None, '1', '1', '1', '4', '1', None, None, '1', None, None, '3', None, '1', None, '1', '1', '1', '1', None, '1', None, '1', '1', None, '2', None, '1', '3', None, '1', '1', None, '2', None, None, None, '2', None, '3', None, None, '3', None, None, '3', None, '1', '1', None, '1', '1', None, None, None, '2', None, '1', None, None, '3', None, None, None, '2', None, '2', None, None, '1', '1', None, None, None, None, None, '1', None, None, '1', None, '1', None, None, '1', None, '1', None, None, None, None, '1', None, None, None, None, '1', None, '1', None, None, '2', '1', None, '1', '1', None, None, '1', '1', None, '2', '1', None, None, '2', None, '3', '2', None, '1', '1', None, '1', '2', None] - self.assertEqual(doc._.morphnums,morphnums) - - def test_mean_nMorph(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - mean_nMorph = 1.5765765765765767 - self.assertEqual(doc._.mean_nMorph,mean_nMorph) - - def test_med_nMorph(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_nMorph,1.0) - - def test_max_nMorph(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_nMorph,4.0) - - def test_min_nMorph(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_nMorph,1.0) - - def test_std_nMorph(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_nMorph,0.7747858182417163) - - def test_root_freqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - root_freqs_HAL = [None, 451541, 82099, 322084, 29916, None, 323757, 746094, 458704, None, 134614, 222937, 297837, None, None, 90058, 39167, None, 362162, None, None, None, None, 1099121, None, 124154, None, None, 291215, 240644, None, 798705, None, 348247, None, 16813, None, None, 92596, 38887, None, None, 282037, 778343, 32067, None, 322084, None, 158531, None, 345235, None, None, 798705, 9931, None, 1051110, None, None, 19217, None, 434649, None, None, 84483, 1982, 194206, None, None, None, 778343, None, 72023, None, 339513, None, None, 116155, None, 716577, None, None, 344318, None, 220761, 510787, None, 12811, 26961, None, None, None, 2788, 118804, None, 78918, 122742, 216780, None, 145491, 163906, None, None, 76970, None, 37645, 297666, None, 57081, 19083, 11239, None, 77215, None, None, None, None, 142732, 53283, 610350, None, None, 181726, None, None, 195606, None, None, None, 39167, None, None, None, 451541, 78959, None, 26961, None, 16813, 348247, None, None, 559015, None, None, 328113, None, 222937, None, 746094, None, None, None, None, 17588, None, None, None, 115343, 46378, None, 322084, 29916, None, 591543, None, 798705, None, 746094, 227932, None, 67167, 87239, 184, None, 628585, 549038, None, None, 559015, None, 2327675, 610350, 113797, None, 1415807, None, 121302, None, 344318, None, None, 19217, 207292, None, None, 7461, 1051110, 1366583, 591543, None, 435097, 60362, 92974, None, 395451, None, 510787, 220761, None, 144552, None, 322084, 134285, 24657, None, 746094, None, 1073485, None, 510114, 4914, None, None, 564255, None, None, 46378, None, 322084, None, None, 107363, 506137, None, 227932, None, 181577, None, 222937, 249, None, None, 43904, 171361, 32067, None, 322084, 185956, None, 8009, None, None, 121302, None, 344318, None, 207292, None, 144416, 332926, 1099121, None, 798705, None, 291215, None, 19176, None, None, 8343, None, None, 506137, None, 591543, None, 222937, None, 746094, None, None, None, None, 364953, None, 322084, 323757, 222937, None, None, None, 3482, 973761, 510114, None, None, 538146, None, 227932, 973761, 221311, None, None, None, 141837, 1415807, 322084, None, None, 171361, None, None, 1921, None, 19217, None, 559015, None, 591543, None, 19795, 651056, None, None, 38772, 32879, None, None, 115343, 63237, 56361, None, 23748, 173720, None, 38772, 29760, 143379, None, 87873, 181726, None, None, 39630, None, None, None, 4547, 318254, None, 1232275, 169032, None, 39902, None, 227932, 303858, None, 451541, None, None, None, 169032, None, 112326, None, 185956, 149114, 91368, None, 340039, 48418, 401691, None, None, 74275, None, 83940, None, None, 38097, 510114, 311285, None, 624, 54696, None, 1631, 204888, None, 92034, None, None, None, 10700, None, None, 451541, None, None, None, 494850, 800097, None, 322084, 323757, None, 222937, 54696, None, 214085, None, None, 53861, 158531, None, None, 28013, None, None, None, None, 185956, None, None, 5478, 348247, None, 203042, None, None, None, None, 93743, None, 2394, None, 65461, 510787, None, 48680, 32789, None, 458870, 254138, 12221, None, None, None, 510787, 49260, 73303, None, None, 28283, 233035, 81153, None, 746094, None, None, 87239, 188225, None, None, None, 65461, 55753, None, None, 115343, 13680, 716577, None, 20108, None, 37242, None, None, None, 52522, 32765, None, None, None, None, 2327675, None, None, 30688, None, None, None, None, None, None, None, 341952, None, 203042, None, 14348, 174627, None, 44405, None, None, 1232275, 27179, None, 84011, None, 21940, None, 19345, 11367, None, None, 105667, None, None, None, None, None, None, None, None, None, 306761, 171361, 282157, None, None, 208147, None, None, 322084, 716577, 282157, 208147, None, None, 95757, None, 222937, None, None, 115343, None, 46378, None, 322084, None, 259443, 42643, None, None, None, 34271, None, 214085, None, None, 230285, None, 39167, 18126, None, 197347, 33227, None, 1740, None, None, 30119, None, 1089, None, 222937, 111227, 320454, 124154, 778343, None, None, 159994, None, None, 322084, None, 13304, None, 323757, 746094, 222937, 15225, None, 54696, None, 345235, 323757, None, 1740, None, 1414857, 322084, None, 40439, 145043, None, 76896, None, None, None, 1740, None, 688810, 36927, None, 13255, None, None, None, None, 200870, 181577, None, 222937, 297837, None, None, None, 44840, None, 118119, None, None, 88079, None, None, None, 455818, None, 124154, None, None, 222937, 111227, None, None, None, None, None, 155817, None, None, 364121, None, 510114, None, None, 973761, None, 281185, None, None, None, None, 3482, None, None, None, None, 591543, None, 4397, None, None, None, 26046, None, 973761, 1328712, None, None, 146049, 5662, None, 208147, 2967, None, None, 259443, None, 322084, 282157, None, 973761, 21313, None, 222937, 54696, None] - self.assertEqual(doc._.root_freqs_HAL, root_freqs_HAL) - - def test_mean_freqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - mean_freqs_HAL = 275457.9043478261 - self.assertEqual(doc._.mean_freq_HAL, mean_freqs_HAL) - - def test_med_freqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_freq_HAL,181577) - - def test_max_freqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_freq_HAL,2327675) - - def test_min_freqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_freq_HAL,184) - - def test_std_freqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_freq_HAL,330014.93612255837) - - def test_log_root_freqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - log_root_freqs_HAL = [None, 13.020421456132732, 11.315681115098373, 12.682567660071214, 10.306148733311558, None, 12.687748513383763, 13.522606876609819, 13.036160400843732, None, 11.810166702670976, 12.314644499317684, 12.60430163597371, None, None, 11.408209186191696, 10.575589834500324, None, 12.79984690451562, None, None, None, None, 13.91002132741618, None, 11.7292780095071, None, None, 12.581817104967033, 12.391073941945889, None, 13.590746945058667, None, 12.76066727710387, None, 9.729907675678929, None, None, 11.436001223156996, 10.568415283521176, None, None, 12.549793546987297, 13.564922580040115, 10.375582742942056, None, 12.682567660071214, None, 11.97370543677171, None, 12.751980623550002, None, None, 13.590746945058667, 9.203416456903359, None, 13.86535730660883, None, None, 9.863550582932428, None, 12.982294087802849, None, None, 11.344305609673212, 7.591861714889934, 12.176674730345614, None, None, None, 13.564922580040115, None, 11.184740791431123, None, 12.735267516853085, None, None, 11.662680785055064, None, 13.482240987328971, None, None, 12.749320927948446, None, 12.304835947349302, 13.143707952542577, None, 9.458059455856434, 10.202146656326484, None, None, None, 7.933079771880415, 11.685230355377529, None, 11.276164617696834, 11.717839870416817, 12.286638293369741, None, 11.887869508010441, 12.007048371749491, None, None, 11.251171014528364, None, 10.535955422284223, 12.603727331546361, None, 10.952226590708511, 9.856553165346915, 9.327145151518268, None, 11.254349017657102, None, None, None, None, 11.86872402499674, 10.883372609935437, 13.321787842097946, None, None, 12.110255337157891, None, None, 12.183857710959016, None, None, None, 10.575589834500324, None, None, None, 13.020421456132732, 11.276684009386841, None, 10.202146656326484, None, 9.729907675678929, 12.76066727710387, None, None, 13.233931585408705, None, None, 12.701113340230703, None, 12.314644499317684, None, 13.522606876609819, None, None, None, None, 9.7749721303007, None, None, None, 11.65566557688837, 10.744580487844381, None, 12.682567660071214, 10.306148733311558, None, 13.290489656292548, None, 13.590746945058667, None, 13.522606876609819, 12.336802617838474, None, 11.114937334467607, 11.37640675761805, 5.214935757608986, None, 13.351226540509186, 13.21592293485404, None, None, 13.233931585408705, None, 14.66038047327681, 13.321787842097946, 11.64217143828822, None, 14.163210244511161, None, 11.706038582842382, None, 12.749320927948446, None, None, 9.863550582932428, 12.241883706265792, None, None, 8.917444732471514, 13.86535730660883, 14.12782402162024, 13.290489656292548, None, 12.983324273718326, 11.008115046859025, 11.440075163155973, None, 12.887782164709064, None, 13.143707952542577, 12.304835947349302, None, 11.881394583391666, None, 12.682567660071214, 11.80771968602779, 10.11281611481661, None, 13.522606876609819, None, 13.886420923217788, None, 13.142389509133297, 8.499843553081124, None, None, 13.243261555960858, None, None, 10.744580487844381, None, 12.682567660071214, None, None, 11.583970895183207, 13.13456266261131, None, 12.336802617838474, None, 12.109435085145911, None, 12.314644499317684, 5.517452896464707, None, None, 10.689760711085556, 12.051527721332148, 10.375582742942056, None, 12.682567660071214, 12.133265365571027, None, 8.988321188323683, None, None, 11.706038582842382, None, 12.749320927948446, None, 12.241883706265792, None, 11.880453302626728, 12.715675522045341, 13.91002132741618, None, 13.590746945058667, None, 12.581817104967033, None, 9.86141477611422, None, None, 9.029178142902074, None, None, 13.13456266261131, None, 13.290489656292548, None, 12.314644499317684, None, 13.522606876609819, None, None, None, None, 12.807523857150143, None, 12.682567660071214, 12.687748513383763, 12.314644499317684, None, None, None, 8.155362120328135, 13.788921172637307, 13.142389509133297, None, None, 13.195885177792945, None, 12.336802617838474, 13.788921172637307, 12.307324231084891, None, None, None, 11.862433789932195, 14.163210244511161, 12.682567660071214, None, None, 12.051527721332148, None, None, 7.560601162768557, None, 9.863550582932428, None, 13.233931585408705, None, 13.290489656292548, None, 9.89318465954023, 13.386350938996445, None, None, 10.5654536156125, 10.400589435082308, None, None, 11.65566557688837, 11.054644851868005, 10.939532708935408, None, 10.075253595390565, 12.065200086648758, None, 10.5654536156125, 10.300920488947028, 11.873246752909115, None, 11.383647869252256, 12.110255337157891, None, None, 10.58734168618448, None, None, None, 8.42222295382501, 12.670605085075938, None, 14.024372612452714, 12.037843325093355, None, 10.594181726935007, None, 12.336802617838474, 12.624315765997553, None, 13.020421456132732, None, None, None, 12.037843325093355, None, 11.629160636636323, None, 12.133265365571027, 11.912466393058425, 11.422650586730443, None, 12.73681559589648, 10.787626924393198, 12.903438415316696, None, None, 11.215529700330809, None, 11.337857536887581, None, None, 10.547890817857143, 13.142389509133297, 12.648464170238247, None, 6.436150368369428, 10.90954585959363, None, 7.396948602621014, 12.230218767358252, None, 11.429913352976165, None, None, None, 9.277999020449997, None, None, 13.020421456132732, None, None, None, 13.112009965324837, 13.592488249299878, None, 12.682567660071214, 12.687748513383763, None, 12.314644499317684, 10.90954585959363, None, 12.274128411404117, None, None, 10.89416193284752, 11.97370543677171, None, None, 10.240423967124363, None, None, None, None, 12.133265365571027, None, None, 8.608495349823023, 12.76066727710387, None, 12.221168133175508, None, None, None, None, 11.448312274378296, None, 7.780720886117918, None, 11.089209824443223, 13.143707952542577, None, 10.793023547101463, 10.397848372270873, None, 13.036522224526205, 12.445632705549428, 9.410911062438, None, None, None, 13.143707952542577, 10.804867671674529, 11.202356814731635, None, None, 10.250016196420983, 12.35894393585903, 11.304091540836126, None, 13.522606876609819, None, None, 11.37640675761805, 12.145393334718323, None, None, None, 11.089209824443223, 10.928686499533763, None, None, 11.65566557688837, 9.523690191176541, 13.482240987328971, None, 9.908873024812467, None, 10.525192435641708, None, None, None, 10.86898740842283, 10.397116151473597, None, None, None, None, 14.66038047327681, None, None, 10.331626977683165, None, None, None, None, None, None, None, 12.742425655317444, None, 12.221168133175508, None, 9.571365838652174, 12.070407549607491, None, 10.701107354692605, None, None, 14.024372612452714, 10.210199895269655, None, 11.338703021632888, None, 9.99606673382922, None, 9.870189267134613, 9.33846969968671, None, None, 11.568047918760465, None, None, None, None, None, None, None, None, None, 12.633824221750912, 12.051527721332148, 12.55021893258803, None, None, 12.245999839836097, None, None, 12.682567660071214, 13.482240987328971, 12.55021893258803, 12.245999839836097, None, None, 11.46956901142031, None, 12.314644499317684, None, None, 11.65566557688837, None, 10.744580487844381, None, 12.682567660071214, None, 12.466292304281597, 10.660618412837621, None, None, None, 10.442054794483777, None, 12.274128411404117, None, None, 12.347072951251615, None, 10.575589834500324, 9.805102650614726, None, 12.19271887955665, 10.411118077405577, None, 7.461640392208575, None, None, 10.312911480831472, None, 6.9930151229329605, None, 12.314644499317684, 11.619328437081467, 12.677494019301028, 11.7292780095071, 13.564922580040115, None, None, 11.982891593512822, None, None, 12.682567660071214, None, 9.495820020872763, None, 12.687748513383763, 13.522606876609819, 12.314644499317684, 9.630694092578098, None, 10.90954585959363, None, 12.751980623550002, 12.687748513383763, None, 7.461640392208575, None, 14.162539023881891, 12.682567660071214, None, 10.60754994483658, 11.884785529164079, None, 11.250209138536196, None, None, None, 7.461640392208575, None, 13.442720749992596, 10.516698269778406, None, 9.492130118723125, None, None, None, None, 12.210413211628595, 12.109435085145911, None, 12.314644499317684, 12.60430163597371, None, None, None, 10.710855877186097, None, 11.67944786985521, None, None, 11.385989418016944, None, None, None, 13.029848886018799, None, 11.7292780095071, None, None, 12.314644499317684, 11.619328437081467, None, None, None, None, None, 11.956437520715296, None, None, 12.805241508962812, None, 13.142389509133297, None, None, 13.788921172637307, None, 12.546768094679367, None, None, None, None, 8.155362120328135, None, None, None, None, 13.290489656292548, None, 8.388677769180811, None, None, None, 10.167619484527648, None, 13.788921172637307, 14.099720609922676, None, None, 11.89169746082223, 8.641532465671846, None, 12.245999839836097, 7.995306620290822, None, None, 12.466292304281597, None, 12.682567660071214, 12.55021893258803, None, 13.788921172637307, 9.967072494161238, None, 12.314644499317684, 10.90954585959363, None] - self.assertEqual(doc._.log_root_freqs_HAL,log_root_freqs_HAL) - - - def test_mean_logfreqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - mean_freqs_HAL = 11.702238824461727 - self.assertEqual(doc._.mean_logfreq_HAL, mean_freqs_HAL) - - def test_med_logfreqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_logfreq_HAL,12.109435085145911) - - def test_max_logfreqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_logfreq_HAL,14.66038047327681) - - def test_min_logfreqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_logfreq_HAL,5.214935757608986) - - def test_std_logfreqs_HAL(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_logfreq_HAL,1.5929041182131862) - - def test_root_pfmfs(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - root_pfmfs = [None, '3.44', '0', '0', '50', None, '0', '0', '0', None, '50', '0', '0', None, None, '0', '0', None, '13.04', None, None, None, None, '0', None, '0', None, None, '0', '0', None, '0', None, '0', None, '25', None, None, '9.09', '7.69', None, None, '2.85', '0', '0', None, '0', None, '58.82', None, '0', None, None, '0', '0', None, '0', None, None, '9.09', None, '0', None, None, '0', '0', '100', None, None, None, '38.82', None, '0', None, '0', None, None, '12.5', None, '24.13', None, None, '0', None, '0', '0', None, '0', '0', None, None, None, '0', '0', None, '0', '0', '0', None, '0', '0', None, None, '0', None, '8.33', '0', None, '20', '0', '0', None, '0', None, None, None, None, '0', '100', '0', None, None, '42.1', None, None, '0', None, None, None, '0', None, None, None, '3.44', '0', None, '0', None, '0', '0', None, None, '10.6', None, None, '10', None, '0', None, '0', None, None, None, None, '0', None, None, None, '0', '71.42', None, '0', '25', None, '0', None, '0', None, '0', '63.63', None, '0', '0', '0', None, '0', '0', None, None, '7.57', None, '0', '0', '16.66', None, '6.66', None, '0', None, '0', None, None, '0', '0', None, None, '16.66', '2.35', '0', '0', None, '0', '16.66', '0', None, '10.25', None, '0', '0', None, '0', None, '0', '0', '0', None, '0', None, '0', None, '0', '0', None, None, '20', None, None, '71.42', None, '0', None, None, '14.28', '44.44', None, '36.36', None, '0', None, '0', '0', None, None, '0', '0', '0', None, '0', '0', None, '0', None, None, '0', None, '0', None, '0', None, '0', '50', '0', None, '0', None, '0', None, '33.33', None, None, '0', None, None, '44.44', None, '0', None, '0', None, '0', None, None, None, None, '0', None, '0', '0', '5.88', None, None, None, '0', '0', '0', None, None, '0', None, '0', '0', '0', None, None, None, '0', '6.66', '0', None, None, '0', None, None, '0', None, '9.09', None, '4.54', None, '0', None, '0', '16.66', None, None, '0', '0', None, None, '0', '0', '0', None, '0', '0', None, '0', '0', '0', None, '0', '42.1', None, None, '0', None, None, None, '0', '15.78', None, '6.84', '33.33', None, '0', None, '0', '9.09', None, '0', None, None, None, '6.06', None, '33.33', None, '0', '0', '0', None, '18.05', '100', '0', None, None, '0', None, '0', None, None, '0', '0', '0', None, '0', '14.28', None, '0', '16.66', None, '10', None, None, None, '0', None, None, '3.44', None, None, None, '0', '0', None, '0', '0', None, '0', '14.28', None, '0', None, None, '0', '11.76', None, None, '9.09', None, None, None, None, '0', None, None, '50', '6.25', None, '0', None, None, None, None, '0', None, '0', None, '0', '0', None, '14.28', '0', None, '22.72', '0', '0', None, None, None, '11.11', '0', '0', None, None, '0', '0', '10.41', None, '0', None, None, '100', '0', None, None, None, '0', '25', None, None, '0', '20', '17.24', None, '6.25', None, '0', None, None, None, '0', '0', None, None, None, None, '0', None, None, '25', None, None, None, None, None, None, None, '0', None, '0', None, '0', '0', None, '0', None, None, '4.1', '0', None, '0', None, '0', None, '0', '14.28', None, None, '0', None, None, None, None, None, None, None, None, None, '0', '0', '0', None, None, '0', None, None, '0', '0', '0', '0', None, None, '0', None, '5.88', None, None, '0', None, '71.42', None, '0', None, '60', '0', None, None, None, '0', None, '0', None, None, '14.28', None, '0', '0', None, '0', '66.66', None, '50', None, None, '0', None, '0', None, '0', '0', '0', '83.33', '0', None, None, '0', None, None, '0', None, '0', None, '0', '0', '0', '0', None, '0', None, '0', '0', None, '50', None, '0', '0', None, '0', '0', None, '0', None, None, None, '50', None, '50', '0', None, '25', None, None, None, None, '0', '0', None, '0', '0', None, None, None, '25', None, '0', None, None, '16.66', None, None, None, '0', None, '0', None, None, '0', '0', None, None, None, None, None, '0', None, None, '0', None, '0', None, None, '0', None, '0', None, None, None, None, '0', None, None, None, None, '0', None, '0', None, None, None, '0', None, '0', '0', None, None, '0', '0', None, '50', '0', None, None, '40', None, '0', '0', None, '0', '0', None, '0', '14.28', None] - self.assertEqual(doc._.root_pfmfs,root_pfmfs) - - def test_mean_root_pfmf(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_root_pfmf,7.700984615384615) - - def test_med_root_pfmf(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_root_pfmf,0.0) - - def test_max_root_pfmf(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_root_pfmf,100.0) - - def test_min_root_pfmf(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_root_pfmf,0.0) - - def test_std_root_pfmf(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_root_pfmf,18.250995193981897) - - def test_token_freqs(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - token_freqs = [7.73, 4.97, 3.96, 5.09, 3.69, 6.85, 5.63, 5.24, 4.84, 6.91, 5.04, 5.35, 5.27, 6.08, 7.73, 5.36, 5.02, None, 4.55, 6.31, 6.37, 6.27, 6.35, 6.29, 7.27, 5.39, 6.0, 7.73, 4.88, 5.14, 7.4, 6.25, 6.71, 5.07, 6.36, 3.95, None, 7.36, 4.98, 4.17, 6.91, 7.36, 4.59, 5.95, 4.34, 6.24, 5.09, 6.37, 2.86, 7.73, 5.89, None, 6.0, 6.25, 3.56, 7.43, 5.96, 7.27, 6.53, 3.94, 7.01, 4.84, 6.91, 6.53, 4.66, 3.57, 4.84, None, 5.72, 7.73, 2.94, None, 4.51, 6.74, 5.75, 7.01, 7.73, 4.5, 6.45, 4.05, 6.85, 7.36, 4.97, 7.01, 4.46, 5.43, 6.91, 4.0, 4.1, 7.01, 6.74, None, 2.73, 4.96, None, 5.12, 4.98, 5.81, None, 5.66, 5.35, 6.45, 6.79, 4.85, 5.87, 4.2, 4.45, 7.01, 3.75, 3.77, 3.28, 7.73, 5.06, None, 5.69, 7.4, 6.04, 5.2, 3.26, 6.06, 6.71, 6.27, 3.07, 6.7, 7.73, 5.32, 7.4, 7.73, 3.89, 5.02, None, None, 7.73, 4.97, 4.5, 7.43, 4.77, 6.04, 4.41, 5.12, 7.43, 7.36, 4.39, 7.27, 7.73, 4.96, 7.01, 4.61, 7.43, 6.08, 7.01, 5.16, None, 7.73, 4.07, 7.07, 7.01, 6.53, 4.91, 3.81, 6.91, 5.09, 2.56, 7.73, 5.97, 7.01, 6.25, 7.43, 6.08, 3.1, 7.43, 4.42, 5.0, 1.88, None, 5.57, 6.04, 6.7, 7.73, 4.52, None, 6.47, 6.06, 4.46, 7.01, 5.69, 7.36, 5.45, None, 4.97, None, 6.54, 4.85, 5.3, None, 7.73, 3.61, 4.43, 6.27, 5.97, 7.43, 5.76, 4.11, 4.68, 7.4, 4.68, None, 5.43, 4.46, 7.41, 4.68, None, 5.09, 5.33, 3.77, 6.82, 5.24, 6.66, 5.54, 7.73, 5.19, 3.65, None, None, 5.58, None, 6.82, 3.81, 6.91, 5.09, 5.74, 6.69, 4.48, 3.03, 7.73, 4.01, 7.01, 4.49, 7.73, 5.35, 4.85, None, 7.73, 4.85, 4.63, 4.43, 7.01, 5.09, 4.67, 7.01, 4.02, None, 7.73, 5.45, None, 4.97, 7.41, 5.3, 6.52, 5.09, 4.98, 6.29, 7.01, 6.25, 7.43, 5.54, 6.36, 3.79, None, 6.82, 4.38, 5.74, 6.69, 3.03, 7.73, 5.97, 7.01, 4.61, 7.43, 6.08, 7.01, 5.16, None, 7.27, 5.41, None, 5.09, 3.18, 4.32, 7.43, 6.69, 6.12, 4.15, 6.25, 5.19, None, 6.63, 5.98, 5.1, 5.02, 6.25, 5.19, 7.01, 5.96, 6.69, 4.73, 5.69, 5.09, None, 7.01, 5.27, None, 7.73, 3.61, 7.4, 3.49, 6.37, 4.72, 7.36, 5.97, 7.01, 4.72, 4.33, 6.91, 7.36, 4.95, 4.83, None, 6.85, 4.68, 5.23, 4.41, 6.63, 4.19, 4.61, None, 4.95, 4.05, 4.83, 7.36, 4.66, 3.07, 7.43, 7.73, 4.76, None, 7.41, None, 3.32, 4.82, None, 4.04, 3.66, 6.91, 5.1, 6.37, 5.05, 4.88, None, 5.52, 7.01, 6.74, 6.69, 4.28, 6.91, 3.81, None, 4.62, 4.29, 4.89, 7.43, 2.88, 4.41, 5.12, 5.84, 6.77, 5.57, None, 4.5, 7.43, 6.04, 4.86, 5.19, 4.78, 7.73, 2.89, 3.04, 7.4, 3.33, 4.5, 7.41, 4.54, None, None, 7.27, 4.46, 7.43, 7.73, 4.97, None, 6.54, 6.46, 5.99, 6.1, 6.24, 5.09, 3.18, 7.73, 5.35, 4.22, None, 4.99, 6.24, 7.73, 4.83, 4.56, 7.41, 7.73, 3.67, 7.4, 7.73, 5.06, 6.37, 5.03, 7.01, 6.53, 3.82, 4.85, 7.4, 4.84, None, 2.46, None, 7.36, 5.07, 5.06, 4.0, 7.01, 5.16, 5.43, None, 3.94, 4.91, 7.43, 5.25, 5.39, 4.48, 7.01, 7.36, 6.36, 4.45, 4.9, 5.04, None, 6.82, 4.68, 4.43, 3.64, 7.4, 5.24, 7.01, 6.34, 4.83, 4.9, 5.93, 7.43, 7.73, 5.16, 2.93, None, 6.85, 4.91, 3.39, 4.16, None, 4.41, 6.46, 4.38, 6.63, 7.73, 6.0, 4.28, 4.06, None, 3.9, 3.43, None, 6.47, 7.4, 7.73, 3.91, 7.4, 7.73, 4.62, 2.6, 5.31, 4.95, None, 5.51, 6.51, 4.84, 6.91, 4.72, 5.56, 2.89, 4.51, None, 7.73, 4.31, 4.91, 7.4, 4.42, 7.41, 4.73, 6.37, 3.66, 3.95, None, 2.56, 4.02, 6.63, 5.4, 5.22, 7.43, 4.78, 5.04, None, None, 6.82, 5.86, 5.27, 4.8, 7.73, 6.0, 5.44, 7.27, 6.24, 5.09, 5.49, 4.8, 5.44, 7.43, 7.73, 5.33, 7.4, 4.32, None, 6.66, 4.68, 6.14, 3.81, 6.91, 5.09, None, 4.78, 4.95, 6.46, 6.18, 6.79, 4.49, None, 4.99, 6.24, 7.73, 5.34, 3.65, 5.02, 4.04, 7.73, 5.1, 3.93, 7.4, 3.26, None, 6.82, 4.9, 6.33, 3.17, 7.73, 5.35, 5.08, 5.75, 1.39, 5.61, None, 7.41, 5.54, 6.85, 7.73, 5.09, 7.4, 3.76, None, 5.63, 5.24, 4.61, 3.52, 7.43, 4.92, 7.36, 5.89, 5.63, 7.4, 3.26, None, 5.47, 5.09, None, 4.94, 5.01, 6.34, 4.53, 6.38, None, 7.41, 3.26, 6.82, 3.52, 4.22, 7.41, 3.03, None, None, 5.09, 6.45, 5.76, 5.05, 7.73, 5.35, 5.27, None, 6.63, 7.73, 4.45, 7.4, 5.3, 7.43, 7.73, 4.31, 7.4, 3.02, None, 5.41, 7.73, 5.39, 7.4, 7.73, 5.35, 5.08, None, 6.31, 6.45, 6.79, 6.35, 4.7, 7.43, 7.73, 5.62, 7.4, 5.19, None, 5.79, 6.25, 7.41, 5.75, None, 7.01, 6.04, 7.43, 4.15, None, 6.31, 7.07, 6.35, 5.97, 7.43, 4.13, 7.43, 7.36, 2.24, 4.58, 7.43, 6.25, 5.74, None, 6.63, 5.32, 4.16, 7.36, 3.79, 3.65, 7.43, 7.73, 4.11, 7.01, 5.09, 4.8, 7.01, 6.25, 3.5, 7.4, 5.35, 4.22, None] - self.assertEqual(doc._.token_freqs,token_freqs) - - def test_mean_token_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_token_frequency,4.649855072463768) - - def test_med_token_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_token_frequency,4.83) - - def test_max_token_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_token_frequency,6.29) - - def test_min_token_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_token_frequency,1.39) - - def test_std_token_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_token_frequency,0.84861873536042) - - def test_lemma_freqs(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - lemma_freqs = [7.73, 4.97, 4.99, 5.09, 3.69, 6.85, 5.63, 5.24, 5.61, 6.91, 5.04, 5.35, 5.27, 6.08, 7.73, 5.36, 5.02, None, 4.55, 6.31, 6.71, 6.79, 6.35, 6.29, 7.27, 5.39, 6.0, 7.73, 5.54, 5.89, 7.4, 6.25, 6.71, 5.54, 6.36, 3.95, None, 7.36, 4.98, 4.17, 6.91, 7.36, 4.59, 5.95, 4.43, 6.24, 5.09, 6.71, 2.85, 7.73, 5.89, None, 6.0, 6.25, 3.56, 7.43, 5.96, 7.27, 6.53, 3.94, 7.01, 5.49, 6.91, 6.53, 4.66, 3.57, 4.84, None, 5.72, 7.73, 2.94, None, 5.26, 6.79, 5.75, 7.01, 7.73, 4.5, 6.45, 4.05, 6.85, 7.36, 4.97, 7.01, 5.26, 5.43, 6.91, 4.0, 4.77, 7.01, 6.79, None, 2.85, 4.96, None, 5.12, 5.34, 5.81, None, 5.66, 5.18, 6.45, 6.79, 4.94, 5.87, 4.2, 5.02, 7.01, 3.75, 4.33, 4.03, 7.73, 5.06, None, 5.69, 7.4, 6.04, 5.2, 3.77, 6.06, 6.71, 6.79, 3.07, 6.7, 7.73, 5.32, 7.4, 7.73, 3.89, 5.02, None, None, 7.73, 4.97, 4.85, 7.43, 4.77, 6.04, 4.41, 5.54, 7.43, 7.36, 4.39, 7.27, 7.73, 4.96, 7.01, 5.35, 7.43, 6.08, 7.01, 5.16, None, 7.73, 4.07, 6.79, 7.01, 6.53, 5.08, 3.81, 6.91, 5.09, 2.98, 7.73, 5.97, 7.01, 6.25, 7.43, 6.08, 3.1, 7.43, 4.42, 5.0, 2.6, None, 5.81, 6.04, 6.7, 7.73, 4.52, None, 6.47, 6.06, 4.46, 7.01, 5.69, 7.36, 5.45, None, 4.97, None, 6.54, 4.85, 5.3, None, 7.73, 3.61, 4.43, 6.27, 5.97, 7.43, 5.76, 4.11, 4.8, 7.4, 4.68, None, 5.43, 4.46, 7.41, 4.68, None, 5.09, 5.33, 4.47, 6.82, 5.24, 6.66, 6.08, 7.73, 5.4, 3.65, None, None, 5.58, None, 6.82, 3.81, 6.91, 5.09, 6.35, 6.69, 4.48, 3.03, 7.73, 4.01, 7.01, 5.05, 7.73, 5.35, 2.44, None, 7.73, 4.85, 5.27, 4.43, 7.01, 5.09, 5.01, 7.01, 4.02, None, 7.73, 5.45, None, 4.97, 7.41, 5.3, 6.52, 5.09, 4.98, 6.29, 7.01, 6.25, 7.43, 5.54, 6.36, 3.79, None, 6.82, 4.38, 6.35, 6.69, 3.03, 7.73, 5.97, 7.01, 5.35, 7.43, 6.08, 7.01, 5.16, None, 7.27, 5.41, None, 5.09, 5.63, 4.32, 7.43, 6.69, 6.12, 4.15, 6.25, 5.4, None, 6.63, 5.98, 5.1, 5.02, 6.25, 5.23, 7.01, 6.35, 6.69, 4.73, 5.69, 5.09, None, 7.01, 5.27, None, 7.73, 3.61, 7.4, 3.94, 6.71, 4.37, 7.36, 5.97, 7.01, 4.72, 4.33, 6.91, 7.36, 4.95, 4.83, None, 6.85, 5.08, 5.23, 4.84, 6.63, 3.89, 5.29, None, 4.95, 4.05, 5.4, 7.36, 4.66, 3.07, 7.43, 7.73, 4.76, None, 7.41, None, 3.32, 4.82, None, 4.04, 3.66, 6.91, 5.1, 6.71, 5.02, 4.88, None, 5.78, 7.01, 6.79, 6.69, 4.28, 6.91, 3.81, None, 5.01, 4.94, 5.49, 7.43, 2.88, 4.41, 5.57, 5.84, 6.77, 5.35, None, 4.84, 7.43, 6.04, 4.86, 5.4, 4.78, 7.73, 2.89, 4.22, 7.4, 3.33, 4.21, 7.41, 4.16, None, None, 7.27, 4.46, 7.43, 7.73, 4.97, None, 6.54, 6.46, 5.99, 6.1, 6.24, 5.09, 5.63, 7.73, 5.35, 4.22, None, 4.99, 6.24, 7.73, 4.83, 4.56, 7.41, 7.73, 3.67, 7.4, 7.73, 5.06, 6.71, 5.01, 7.01, 6.53, 3.82, 4.85, 7.4, 5.36, None, 2.46, None, 7.36, 5.07, 5.06, 4.0, 7.01, 5.16, 5.43, None, 4.22, 4.77, 7.43, 5.25, 5.39, 3.64, 7.01, 7.36, 6.36, 4.45, 4.9, 5.04, None, 6.82, 4.68, 5.48, 3.77, 7.4, 5.24, 7.01, 6.79, 4.83, 5.36, 5.93, 7.43, 7.73, 5.16, 2.93, None, 6.85, 5.08, 3.39, 4.29, None, 4.41, 6.46, 4.38, 6.63, 7.73, 6.0, 4.28, 4.75, None, 3.9, 3.43, None, 6.47, 7.4, 7.73, 4.08, 7.4, 7.73, 4.62, 2.6, 5.31, 4.95, None, 5.05, 6.51, 5.36, 6.91, 4.72, 5.56, 2.89, 4.47, None, 7.73, 4.31, 4.91, 7.4, 5.1, 7.41, 4.73, 6.71, 4.12, 3.95, None, 2.56, 4.87, 6.63, 5.4, 5.22, 7.43, 4.78, 5.04, None, None, 6.82, 5.86, 5.27, 5.15, 7.73, 6.0, 5.44, 7.27, 6.24, 5.09, 5.49, 5.15, 5.44, 7.43, 7.73, 5.33, 7.4, 4.32, None, 6.66, 5.08, 6.14, 3.81, 6.91, 5.09, None, 4.78, 5.06, 6.46, 6.18, 6.79, 4.57, None, 4.99, 6.24, 7.73, 5.34, 3.65, 5.02, 4.52, 7.73, 5.1, 3.93, 7.4, 3.26, None, 6.82, 4.9, 6.71, 2.58, 7.73, 5.35, 5.08, 5.75, 1.39, 5.95, None, 7.41, 5.54, 6.85, 7.73, 5.09, 7.4, 4.0, None, 5.63, 5.24, 5.35, 4.31, 7.43, 4.92, 7.36, 5.89, 5.63, 7.4, 3.26, None, 5.81, 5.09, None, 4.94, 5.29, 6.79, 4.81, 6.38, None, 7.41, 3.26, 6.79, 3.52, 4.82, 7.41, 3.29, None, None, 5.09, 6.45, 5.76, 5.05, 7.73, 5.35, 5.27, None, 6.63, 7.73, 4.45, 7.4, 5.3, 7.43, 7.73, 4.31, 7.4, 3.02, None, 5.71, 7.73, 5.39, 7.4, 7.73, 5.35, 5.08, None, 6.31, 6.45, 6.79, 6.35, 4.7, 7.43, 7.73, 5.62, 7.4, 5.4, None, 5.79, 6.25, 7.41, 5.75, None, 7.01, 6.54, 7.43, 4.15, None, 6.31, 6.79, 6.35, 5.97, 7.43, 4.13, 7.43, 7.36, 2.24, 4.58, 7.43, 6.25, 5.72, None, 6.63, 5.32, 4.16, 7.36, 3.79, 3.65, 7.43, 7.73, 4.58, 7.01, 5.09, 5.15, 7.01, 6.25, 4.43, 7.4, 5.35, 4.22, None] - self.assertEqual(doc._.lemma_freqs,lemma_freqs) - - def test_mean_lemma_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_lemma_frequency,4.75159420289855) - - def test_med_lemma_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_lemma_frequency,4.97) - - def test_max_lemma_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_lemma_frequency,6.29) - - def test_min_lemma_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_lemma_frequency,1.39) - - def test_std_lemma_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_lemma_frequency,0.8482944982612167) - - def test_max_freqs(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - max_freqs = [7.73, 4.97, 4.99, 5.09, 3.69, 6.85, 5.63, 5.24, 5.61, 6.91, 5.04, 5.35, 5.27, 6.08, 7.73, 5.36, 5.02, 0.0, 4.55, 6.31, 6.71, 6.79, 6.35, 6.29, 7.27, 5.39, 6.0, 7.73, 5.54, 5.89, 7.4, 6.25, 6.71, 5.54, 6.36, 3.95, 0.0, 7.36, 4.98, 4.17, 6.91, 7.36, 4.59, 5.95, 4.43, 6.24, 5.09, 6.71, 2.85, 7.73, 5.89, 0.0, 6.0, 6.25, 3.56, 7.43, 5.96, 7.27, 6.53, 3.94, 7.01, 5.49, 6.91, 6.53, 4.66, 3.57, 4.84, 0.0, 5.72, 7.73, 2.94, 0.0, 5.26, 6.79, 5.75, 7.01, 7.73, 4.5, 6.45, 4.05, 6.85, 7.36, 4.97, 7.01, 5.26, 5.43, 6.91, 4.0, 4.77, 7.01, 6.79, 3.79, 2.85, 4.96, 0.0, 5.12, 5.34, 5.81, 0.0, 5.66, 5.18, 6.45, 6.79, 4.94, 5.87, 4.2, 5.02, 7.01, 3.75, 4.33, 4.03, 7.73, 5.06, 0.0, 5.69, 7.4, 6.04, 5.2, 3.77, 6.06, 6.71, 6.79, 3.07, 6.7, 7.73, 5.32, 7.4, 7.73, 3.89, 5.02, 0.0, 0.0, 7.73, 4.97, 4.85, 7.43, 4.77, 6.04, 4.41, 5.54, 7.43, 7.36, 4.39, 7.27, 7.73, 4.96, 7.01, 5.35, 7.43, 6.08, 7.01, 5.16, 0.0, 7.73, 4.07, 6.79, 7.01, 6.53, 5.08, 3.81, 6.91, 5.09, 2.98, 7.73, 5.97, 7.01, 6.25, 7.43, 6.08, 3.1, 7.43, 4.42, 5.0, 2.6, 0.0, 5.81, 6.04, 6.7, 7.73, 4.52, 0.0, 6.47, 6.06, 4.46, 7.01, 5.69, 7.36, 5.45, 0.0, 4.97, 0.0, 6.54, 4.85, 5.3, 0.0, 7.73, 3.61, 4.43, 6.27, 5.97, 7.43, 5.76, 4.11, 4.8, 7.4, 4.68, 0.0, 5.43, 4.46, 7.41, 4.68, 0.0, 5.09, 5.33, 4.47, 6.82, 5.24, 6.66, 6.08, 7.73, 5.4, 3.65, 0.0, 0.0, 5.58, 0.0, 6.82, 3.81, 6.91, 5.09, 6.35, 6.69, 4.48, 3.03, 7.73, 4.01, 7.01, 5.05, 7.73, 5.35, 2.44, 0.0, 7.73, 4.85, 5.27, 4.43, 7.01, 5.09, 5.01, 7.01, 4.02, 0.0, 7.73, 5.45, 0.0, 4.97, 7.41, 5.3, 6.52, 5.09, 4.98, 6.29, 7.01, 6.25, 7.43, 5.54, 6.36, 3.79, 0.0, 6.82, 4.38, 6.35, 6.69, 3.03, 7.73, 5.97, 7.01, 5.35, 7.43, 6.08, 7.01, 5.16, 0.0, 7.27, 5.41, 0.0, 5.09, 5.63, 4.32, 7.43, 6.69, 6.12, 4.15, 6.25, 5.4, 0.0, 6.63, 5.98, 5.1, 5.02, 6.25, 5.23, 7.01, 6.35, 6.69, 4.73, 5.69, 5.09, 0.0, 7.01, 5.27, 0.0, 7.73, 3.61, 7.4, 3.94, 6.71, 4.37, 7.36, 5.97, 7.01, 4.72, 4.33, 6.91, 7.36, 4.95, 4.83, 0.0, 6.85, 5.08, 5.23, 4.84, 6.63, 3.89, 5.29, 0.0, 4.95, 4.05, 5.4, 7.36, 4.66, 3.07, 7.43, 7.73, 4.76, 0.0, 7.41, 0.0, 3.32, 4.82, 0.0, 4.04, 3.66, 6.91, 5.1, 6.71, 5.02, 4.88, 0.0, 5.78, 7.01, 6.79, 6.69, 4.28, 6.91, 3.81, 0.0, 5.01, 4.94, 5.49, 7.43, 2.88, 4.41, 5.57, 5.84, 6.77, 5.35, 0.0, 4.84, 7.43, 6.04, 4.86, 5.4, 4.78, 7.73, 2.89, 4.22, 7.4, 3.33, 4.21, 7.41, 4.16, 0.0, 0.0, 7.27, 4.46, 7.43, 7.73, 4.97, 0.0, 6.54, 6.46, 5.99, 6.1, 6.24, 5.09, 5.63, 7.73, 5.35, 4.22, 0.0, 4.99, 6.24, 7.73, 4.83, 4.56, 7.41, 7.73, 3.67, 7.4, 7.73, 5.06, 6.71, 5.01, 7.01, 6.53, 3.82, 4.85, 7.4, 5.36, 0.0, 2.46, 0.0, 7.36, 5.07, 5.06, 4.0, 7.01, 5.16, 5.43, 0.0, 4.22, 4.77, 7.43, 5.25, 5.39, 3.64, 7.01, 7.36, 6.36, 4.45, 4.9, 5.04, 0.0, 6.82, 4.68, 5.48, 3.77, 7.4, 5.24, 7.01, 6.79, 4.83, 5.36, 5.93, 7.43, 7.73, 5.16, 2.93, 0.0, 6.85, 5.08, 3.39, 4.29, 0.0, 4.41, 6.46, 4.38, 6.63, 7.73, 6.0, 4.28, 4.75, 0.0, 3.9, 3.43, 0.0, 6.47, 7.4, 7.73, 4.08, 7.4, 7.73, 4.62, 2.6, 5.31, 4.95, 0.0, 5.05, 6.51, 5.36, 6.91, 4.72, 5.56, 2.89, 4.47, 0.0, 7.73, 4.31, 4.91, 7.4, 5.1, 7.41, 4.73, 6.71, 4.12, 3.95, 0.0, 2.56, 4.87, 6.63, 5.4, 5.22, 7.43, 4.78, 5.04, 0.0, 0.0, 6.82, 5.86, 5.27, 5.15, 7.73, 6.0, 5.44, 7.27, 6.24, 5.09, 5.49, 5.15, 5.44, 7.43, 7.73, 5.33, 7.4, 4.32, 0.0, 6.66, 5.08, 6.14, 3.81, 6.91, 5.09, 0.0, 4.78, 5.06, 6.46, 6.18, 6.79, 4.57, 0.0, 4.99, 6.24, 7.73, 5.34, 3.65, 5.02, 4.52, 7.73, 5.1, 3.93, 7.4, 3.26, 0.0, 6.82, 4.9, 6.71, 2.58, 7.73, 5.35, 5.08, 5.75, 1.39, 5.95, 0.0, 7.41, 5.54, 6.85, 7.73, 5.09, 7.4, 4.0, 0.0, 5.63, 5.24, 5.35, 4.31, 7.43, 4.92, 7.36, 5.89, 5.63, 7.4, 3.26, 0.0, 5.81, 5.09, 0.0, 4.94, 5.29, 6.79, 4.81, 6.38, 0.0, 7.41, 3.26, 6.79, 3.52, 4.82, 7.41, 3.29, 0.0, 0.0, 5.09, 6.45, 5.76, 5.05, 7.73, 5.35, 5.27, 0.0, 6.63, 7.73, 4.45, 7.4, 5.3, 7.43, 7.73, 4.31, 7.4, 3.02, 0.0, 5.71, 7.73, 5.39, 7.4, 7.73, 5.35, 5.08, 0.0, 6.31, 6.45, 6.79, 6.35, 4.7, 7.43, 7.73, 5.62, 7.4, 5.4, 0.0, 5.79, 6.25, 7.41, 5.75, 0.0, 7.01, 6.54, 7.43, 4.15, 0.0, 6.31, 6.79, 6.35, 5.97, 7.43, 4.13, 7.43, 7.36, 2.24, 4.58, 7.43, 6.25, 5.72, 0.0, 6.63, 5.32, 4.16, 7.36, 3.79, 3.65, 7.43, 7.73, 4.58, 7.01, 5.09, 5.15, 7.01, 6.25, 4.43, 7.4, 5.35, 4.22, 0.0] - self.assertEqual(doc._.max_freqs,max_freqs) - - def test_mean_max_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_max_frequency,4.75159420289855) - - def test_med_max_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_max_frequency,4.97) - - def test_max_max_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_max_frequency,6.29) - - def test_min_max_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_max_frequency,1.39) - - def test_std_max_frequency(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_max_frequency,0.8482944982612167) - - def test_concretes(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - concretes = [None, 3.6101290323, 4.5504999999999995, 2.727, 2.1228816162, None, 1, 3.7213333333, 4.42225, None, 3.4594, 2.67236875, 4.6227030272, None, None, 3.4244923077, 4.4815135135, None, 2.13169, None, None, None, None, 2.485, None, 3.758, None, None, 4.2983770334999996, 2.719, None, 4.0375862069, None, 3.7058418914, 1.0, 2.9844662465000003, None, None, 3.7257487628, 4.273, None, None, 3.3423284553, 4.6765, 3.912, None, 2.727, None, 3.715627451, None, 4.956, None, 3.324, 4.0375862069, 4.3136091861, None, 4.1723939394, None, None, 6.2751666667, None, 3.4237857726, None, None, 3.3450666667, 5.166, 6.371, None, None, None, 4.75, None, 2.653, None, 2.5534, None, None, 6.0109, None, 3.49005, None, None, 6.331, None, 3.715627451, 4.7225, None, 6.485, 5.8604285714, None, None, None, 5.219, 1, None, None, 2.824, 4.571, None, 5.0, 5.7865, None, None, 2.6895, None, 1, 3.781, None, 3.543, 6.029, 3.90935, None, 6.421, None, None, None, None, 2.9277179487, 4.196697479, None, None, None, 2.6968, None, None, 4.7328896984, None, None, 3.2888, 4.4815135135, None, None, None, 3.6101290323, 3.6565000000000003, None, 3.758, None, 3.012, 3.4627777778, None, None, 4.4855972222, None, None, 3.30775, None, 3.394, None, 2.74, None, None, None, None, 3.6101290323, None, None, None, 3.923, 3.059, None, 2.727, 3.936, None, 2.8826666667, None, 4.0375862069, None, 2.74, 2.1318350311, None, 3.118, 4.0028023392, 2.85, None, 3.314, 1, None, None, 4.059, None, None, None, 4.086, None, None, None, 6.2751666667, None, 6.331, None, None, 4.0784, 6.5945, None, None, 2.90060625, 5.5668983051000005, None, 1, None, 4.938, 3.1170739726, 3.0, None, 6.09975, None, 4.7225, 4.514, None, 4.9961666667, None, 2.727, 2.9116, 5.333, None, 3.7213333333, None, 3.90935, None, 3.6731428571, 3.4237009585, None, None, 3.69, None, None, 3.059, None, 2.727, None, None, 1.7887015385, 2.8723333333000003, None, 3.9589, None, 3.0855, None, 2.67236875, 6.343, None, None, 3.7830876481, 4.2752291667, 3.912, None, 2.727, 2.971, None, 3.114, None, None, 6.2751666667, None, 6.331, None, 6.5945, None, 3.7665769231, 2.68816, 2.485, None, 4.0375862069, None, 4.2983770334999996, 1.0, 2.7195516168, None, None, 4.086, None, None, 2.8723333333000003, None, 2.8826666667, None, 3.394, None, 2.74, None, None, None, None, 3.3318333333, None, 2.727, 3.118, 4.059, None, None, 1, 4.5504999999999995, 3.742, 3.6731428571, None, None, None, None, 3.8214958678, 3.742, 3.147, None, None, None, 3.5315, None, 2.727, None, None, 4.2752291667, None, None, 2.471, None, 6.2751666667, None, 3.2952, None, 2.8826666667, None, 5.2524, 2.943, None, None, 3.818, 4.798, None, None, 3.657, 5.2652, 2.9153333333, None, 3.378, 5.75, None, 3.818, 4.412, 3.378, None, 3.344, 2.6968, None, None, 6.072, None, None, None, 5.241, 3.743, None, 2.1816, 3.6151677852, None, 5.0698, None, 3.8214958678, 4.909, None, 5.0556666667, None, None, None, 2.8893595993, None, 4.235, None, 2.971, 3.613, 5.324, None, 3.9699999999999998, 3.03, 4.7725879397, 3.0203733333, None, 5.5494545455, None, 3.121, None, None, 2.7252553846, 3.6731428571, 3.1517857143, None, 1, 3.7556666667, None, 2.669742723, 5.4043333333, None, 5.5165, None, None, None, 3.313, None, None, 3.6101290323, None, None, None, 1, 3.7104, None, 2.727, 3.118, None, 2.67236875, 3.7556666667, None, 3.096037037, None, None, 5.063, 3.333, None, None, 4.1698333333, None, None, 5.418, None, 2.971, None, None, 1, 4.8692888889, None, 3.5119111111, None, None, None, None, 2.839, 5.418, 4.0, None, 3.588, 4.7225, None, 2.971, 4.986, None, 4.912, 4.1943333333, 4.5116521739, None, None, 1.0, 1, 5.8991428571, 4.3125, None, None, 4.9231666667, 3.057, 5.2637428928, None, 3.7213333333, None, None, 3.1614336165, 3.8631272727, None, None, None, 3.588, 4.9658318739, None, None, 3.923, 3.665856, 4.462, None, 3.947, None, 4.177, None, None, 2.5534, 2.6578920635000003, 5.985, None, None, None, None, None, None, None, 6.0, None, None, 5.2090425532, 4.4815135135, None, 3.02425, None, 4.089, None, 3.5119111111, None, 4.061, 3.442, 4.3243979592, 3.0, None, None, 3.0724059406, 4.857, None, 3.8985000000000003, None, 5.888, None, 3.677, 3.2069333333, None, None, 4.7305, None, 5.3066707678, 5.311314978, None, None, 4.7443187428, None, None, None, 3.4244923077, 4.2752291667, 4.0125, None, 3.324, 2.455, None, None, 2.727, 2.677, 4.0125, 2.455, None, None, 3.091, None, 4.059, None, None, 3.657, None, 3.059, None, 2.727, None, 2.23728, 3.02425, None, 1, None, 2.6895, None, 3.096037037, None, None, 3.4244923077, 3.2888, 4.4815135135, 3.9726654991, None, 2.8921342281999998, 5.143, None, 5.529, None, None, 5.2455789474, None, 4.3083333333, None, 2.67236875, 4.412, None, 3.4553036269, 4.4815135135, None, None, 1, None, None, 2.727, None, 5.688, None, 1, 3.7213333333, 3.394, 2.448, None, 1.9140000000000001, None, 4.956, 1, None, 5.529, None, 3.433, 2.727, None, 4.8551666667, 4.533, None, 3.715627451, None, None, None, 5.529, None, 2.2810043213, 3.9855, None, 4.3683333333, None, None, 2.727, None, 1, 3.0855, None, 2.67236875, 4.6227030272, None, None, None, 2.9779999999999998, None, 6.123, None, None, 4.0626, None, 4.58232, None, 3.5734444443999998, None, 3.758, None, None, 2.67236875, 4.412, None, None, None, None, None, 2.824, None, None, 4.7603333333, None, 3.6731428571, None, None, 3.742, None, 3.2255333333, None, None, None, None, 4.5504999999999995, None, None, 3.1517857143, None, 2.8826666667, None, 3.5, None, None, 5.8155, 2.6575714286, None, 3.742, 4.6522857143, None, None, 2.5, 3.182, None, 1.8708, 4.309, None, None, 3.2032333333, None, 2.727, 4.0125, None, 3.742, 5.375, None, 2.67236875, 3.7556666667, None] - - self.assertEqual(doc._.concretes,concretes) - - def test_mean_concreteness(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_concreteness,3.828875890239528) - - def test_med_concreteness(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.med_concreteness,3.7213333333) - - def test_max_concreteness(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_concreteness,6.5945) - - def test_min_concreteness(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_concreteness,1.0) - - def test_std_concreteness(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_concreteness,1.1503349614987735) - """ - - def test_abstract_traits(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - abstract_traits = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.abstract_traits,abstract_traits) - - def test_propn_abstract_traits(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_abstract_traits,0.08948863636363637) - - def test_animates(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - animates = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] - - self.assertEqual(doc._.animates,animates) - - def test_propn_animates(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_animates,0.06392045454545454) - - def test_locations(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - locations = [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, None, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, None, False, False, None, False, False, True, None, False, False, False, False, False, False, False, False, False, False, False, False, False, True, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, None, False, False, False, False, False, False, True, None, False, None, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, None, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, None, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, None, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, None, False, None, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, None, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, None, False, False, False, False, False, False, False, True, None, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, True, True, False, True, True, None, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, None, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, None, False, False, None, False, False, False, False, False, None, False, False, False, False, False, False, False, None, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, None, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, None, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, None] - self.assertEqual(doc._.locations,locations) - - def test_propn_locations(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_locations,0.014204545454545454) - - def test_deictics(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - deictics = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.deictics,deictics) - - def test_propn_deictics(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_deictics,0.041193181818181816) - -""" diff --git a/tests/old_tests/test_prompt_specific_features.py b/tests/old_tests/test_prompt_specific_features.py deleted file mode 100644 index dbcf5f6..0000000 --- a/tests/old_tests/test_prompt_specific_features.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -from awe_components.components.utility_functions import print_parse_tree -from awe_components.components.contentSegmentation import * -import unittest -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -text = "The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination." -label = 'GRE_Sample_Essay' - -prompt = "As people rely more and more on technology to solve problems, the ability of humans to think for themselves will surely deteriorate." -prompt_label = 'Prompt texst' - -holmes_manager.parse_and_register_document(text, label) - - -class PromptSpecificFeatureTest(unittest.TestCase): - - def test_content_segments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - content_segments = [[18, 37], [37, 52], [52, 68], [68, 95], [95, 114], [114, 131], [242, 269], [309, 328], [328, 351], [351, 392], [411, 470], [470, 526], [527, 547], [547, 561], [561, 574], [574, 621]] - self.assertEqual(doc._.content_segments, - content_segments) - - def test_prompt_related(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_related = [[14, 3.6747192922762837, ['technology', 'emerge', 'market', 'Technology', 'nanotechnology'], [3, 46, 161, 212, 229, 247, 286, 307, 333, 334, 405, 537, 552, 589, 606, 623, 640, 695]], [51, 2.7333009865760953, ['thinking', 'assumption', 'argue', 'hypothetical', 'fact', 'consider', 'layman', 'actually', 'imagine'], [7, 154, 183, 196, 216, 284, 411, 459, 468, 538, 561, 594, 598]], [33, 2.6008002462296087, ['human', 'earth', 'humanity'], [11, 112, 147, 239, 277, 288, 408, 545, 579, 595, 628, 647, 701]]] - self.assertEqual(doc._.prompt_related, - prompt_related) - - def test_prompt_language(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_language = ['technology', 'think', 'thinking', 'human', 'sure', 'surely', 'chance', 'chances', 'conceive', 'inconceivable', 'look', 'looking', 'argue', 'however', 'necessary', 'necessarily', 'create', 'creativity', 'efficient', 'efficiency', 'fact', 'only', 'imagine', 'imagination', 'imaginations', 'contrast', 'consider', 'hope', 'yet', 'embrace'] - self.assertEqual(doc._.prompt_language, - prompt_language) - - def test_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - core_sentences = [[0, 18], [131, 153], [153, 175], [212, 223], [283, 309], [393, 411], [671, 704]] - self.assertEqual(doc._.core_sentences, - core_sentences) - - def test_extended_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - extended_core_sentences = [[175, 212], [223, 242], [269, 283], [622, 642], [642, 671]] - self.assertEqual(doc._.extended_core_sentences, - extended_core_sentences) diff --git a/tests/old_tests/test_server_api.py b/tests/old_tests/test_server_api.py deleted file mode 100644 index c251e19..0000000 --- a/tests/old_tests/test_server_api.py +++ /dev/null @@ -1,1178 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import pickle -import base64 -import math -import time -import os -import sys -import pandas as pd -import pandas.testing as pd_testing -import random -import threading -import unittest -from multiprocessing import Process, Queue - -import awe_languagetool.languagetoolServer -import awe_spellcorrect.spellcorrectServer -import awe_workbench.web.parserServer -from awe_workbench.web.websocketClient import websocketClient -from awe_languagetool.languagetoolClient import languagetoolClient - - -def startServers(): - queue = Queue() - p1 = Process(target=awe_languagetool.languagetoolServer.runServer, args=()) - p1.start() - - p2 = Process(target=awe_spellcorrect.spellcorrectServer.spellcorrectServer, args=()) - p2.start() - - p3 = Process(target=awe_workbench.web.parserServer.parserServer, args=()) - p3.start() - time.sleep(60) - return p1, p2, p3 - - -def initialize(): - """ - Initialize our CorpusSpellcheck and parser objects (for spell- - correction and parsing with spacy + coreferee and other extensions - using a modified version of the holmes extractor library. While - doing so, we initialize a series of lexical databases that support - some of the metrics we want to capture. - """ - # Initialize the spellchecker - cs = websocketClient() - lt = languagetoolClient() - - # Initialize the parser - parser = websocketClient() - parser.set_uri("ws://localhost:8766") - - # return spellchecker and parser objects - return cs, parser, lt - -# GRE Samples from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -# Aesop's fable is a public domain document available -# at http://read.gov/aesop/007.html - - -labels = ['GRE_Sample_Essay_1', 'GRE_Sample_Essay_2', 'Aesop'] -texts = ["The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands fropython force script to exitm emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", "In recent centuries, humans have developed the technology very rapidly, and you may accept some merit of it, and you may see a distortion in society occured by it. To be lazy for human in some meaning is one of the fashion issues in thesedays. There are many symptoms and resons of it. However, I can not agree with the statement that the technology make humans to be reluctant to thinkng thoroughly.\n\nOf course, you can see the phenomena of human laziness along with developed technology in some place. However, they would happen in specific condition, not general. What makes human to be laze of thinking is not merely technology, but the the tendency of human that they treat them as a magic stick and a black box. Not understanding the aims and theory of them couses the disapproval problems.\n\nThe most important thing to use the thechnology, regardless the new or old, is to comprehend the fundamental idea of them, and to adapt suit tech to tasks in need. Even if you recognize a method as a all-mighty and it is extremely over-spec to your needs, you can not see the result you want. In this procedure, humans have to consider as long as possible to acquire adequate functions. Therefore, humans can not escape from using their brain.\n\nIn addition, the technology as it is do not vain automatically, the is created by humans. Thus, the more developed tech and the more you want a convenient life, the more you think and emmit your creativity to breakthrough some banal method sarcastically.\n\nConsequently, if you are not passive to the new tech, but offensive to it, you would not lose your ability to think deeply. Furthermore, you may improve the ability by adopting it.", "A lion lay asleep in the forest, his great head resting on his paws. A timid little mouse came upon him unexpectedly, and in her fright and haste to get away, ran across the lion's nose. Roused from his nap, the lion laid his huge paw angrily on the tiny creature to kill her.\n\n\"Spare me!\" begged the poor mouse. \"Please let me go and some day I will surely repay you.\"\n\nThe lion was much amused to think that a mouse could ever help him. But he was generous and finally let the mouse go.\n\nSome days later, while stalking his prey in the forest, the lion was caught in the toils of a hunter's net. Unable to free himself, he filled the forest with his angry roaring. The mouse knew the voice and quickly found the lion struggling in the net. Running to one of the great ropes that bound him, she gnawed it until it parted, and soon the lion was free.\n\n\"You laughed when I said I would repay you,\" said the Mouse. \"Now you see that even a Mouse can help a Lion.\""] - - -class ServerAPITest(unittest.TestCase): - - p1 = None - p2 = None - p3 = None - cs = None - parser = None - lt = None - - @classmethod - def setUpClass(self): - self.p1, self.p2, self.p3 = startServers() - self.cs, self.parser, self.lt = initialize() - - @classmethod - def tearDownClass(self): - self.p1.terminate() - self.p2.terminate() - self.p3.terminate() - self.p1 = None - self.p2 = None - self.p3 = None - self.cs = None - self.parser = None - self.lt = None - - def testLTMatch(self): - record = None - matches = self.lt.processText(record, texts[1]) - # with open("pickles/ltmatches.pkl", "wb") as fp: - # pickle.dump(matches, fp) - # fp.close() - with open("pickles/ltmatches.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(matches, comparison) - - def testLTSummary(self): - df1 = self.lt.summarizeMultipleTexts(labels, texts) - df1.set_index('ID', inplace=True) - print(df1) - # with open("pickles/languagetool_summary.pkl", "wb") as fp: - # pickle.dump(df1, fp) - # fp.close() - df2 = pd.read_pickle('pickles/languagetool_summary.pkl') - pd_testing.assert_frame_equal(df1, df2) - - def testSpellCorrect(self): - corrected = self.cs.send(texts) - # with open("pickles/spellcorrected.pkl", "wb") as fp: - # pickle.dump(corrected, fp) - # fp.close() - with open('pickles/spellcorrected.pkl', 'rb') as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(corrected, comparison) - - def test_parseset(self): - ok = self.parser.send(['PARSESET', [labels, texts]]) - print('parsed', ok) - self.assertEqual(ok, True) - labels2 = self.parser.send(['LABELS']) - self.assertEqual(labels.sort(), labels2.sort()) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def test_parseone(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - print('parsed', ok) - self.assertEqual(ok, True) - ok = self.parser.send(['REMOVE', labels[0]]) - self.assertEqual(ok, True) - - def testDocTokens(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - tokens = self.parser.send(['DOCTOKENS', labels[0]]) - #with open("pickles/doctokens.pkl", "wb") as fp: - # pickle.dump(tokens, fp) - # fp.close() - with open("pickles/doctokens.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(tokens, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testDocHeads(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['DOCHEADS', labels[0]]) - # with open("pickles/docheads.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/docheads.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testDocDependencies(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['DOCDEPENDENCIES', labels[0]]) - # with open("pickles/docdependencies.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/docdependencies.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testDocEntities(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['DOCENTITIES', labels[0]]) - # with open("pickles/docentities.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/docentities.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSearchPhrase(self): - ok = self.parser.send(['PARSEONE', - 'test', - 'A dog barked at a cat']) - self.assertEqual(ok, True) - self.parser.send(['NEWSEARCHPHRASE', 'dogs bark', 'db']) - matches = self.parser.send(['MATCH_DOCUMENTS']) - # with open("pickles/matches_docs.pkl", "wb") as fp: - # pickle.dump(matches, fp) - # fp.close() - with open("pickles/matches_docs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - ok = self.parser.send(['REMOVELABELEDSEARCH', 'db']) - self.assertEqual(ok, True) - ok = self.parser.send(['CLEARSEARCHES']) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSearchPhrase2(self): - ok = self.parser.send(['PARSESET', [labels, texts]]) - self.assertEqual(ok, True) - ok = self.parser.send(['NEWSEARCHPHRASE', - 'A mouse helps a lion', 'ml']) - ok = self.parser.send(['NEWSEARCHPHRASE', - 'Technology encourages thinking', 'tt']) - ok = self.parser.send(['NEWSEARCHPHRASE', - 'No man saw me', 'nm']) - ok = self.parser.send(['REMOVELABELEDSEARCH', 'nm']) - self.assertEqual(ok, True) - info = self.parser.send(['SHOWSEARCHLABELS']) - # with open("pickles/searchlabels.pkl", "wb") as fp: - # pickle.dump(info, fp) - # fp.close() - with open("pickles/searchlabels.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - print(info, '\n', comparison) - self.assertEqual(info, comparison) - matches = self.parser.send(['MATCH_DOCUMENTS']) - # with open("pickles/matches_docs.pkl", "wb") as fp: - # pickle.dump(matches, fp) - # fp.close() - with open("pickles/matches_docs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(matches, comparison) - ok = self.parser.send(['CLEARSEARCHES']) - self.assertEqual(ok, True) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testTopicMatches(self): - ok = self.parser.send(['PARSESET', [labels, texts]]) - self.assertEqual(ok, True) - matches = self.parser.send(['TOPIC_MATCHES', - 'A mouse helps a lion.']) - # with open("pickles/topicmatches.pkl", "wb") as fp: - # pickle.dump(matches, fp) - # fp.close() - with open("pickles/topicmatches.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(comparison, matches) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testLemmas(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['LEMMAS', labels[0]]) - # with open("pickles/lemmas.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/lemmas.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testWordTypes(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['WORDTYPES', labels[0]]) - # with open("pickles/wordtypes.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/wordtypes.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testRoots(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['ROOTS', labels[0]]) - # with open("pickles/roots.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/roots.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSyllables(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['SYLLABLES', labels[0]]) - # with open("pickles/syllables.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/syllables.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testWordLength(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['WORDLENGTH', labels[0]]) - # with open("pickles/wordlength.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/wordlength.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testLatinates(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['LATINATES', labels[0]]) - # with open("pickles/latinates.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/latinates.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testAcademics(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['ACADEMICS', labels[0]]) - # with open("pickles/academics.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/academics.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testFamilySizes(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['FAMILYSIZES', labels[0]]) - # with open("pickles/familysizes.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/familysizes.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSenseNums(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['SENSENUMS', labels[0]]) - # with open("pickles/sensenums.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/sensenums.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testLogSenseNums(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['LOGSENSENUMS', labels[0]]) - # with open("pickles/logsensenums.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/logsensenums.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testMorpholex(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['MORPHOLOGY', labels[0]]) - # with open("pickles/morpholex.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/morpholex.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testMorphNums(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['MORPHNUMS', labels[0]]) - # with open("pickles/morphnums.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/morphnums.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testHALRootFreqs(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['HALROOTFREQS', labels[0]]) - # with open("pickles/halrootfreqs.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/halrootfreqs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testHALLogRootFreqs(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['HALLOGROOTFREQS', labels[0]]) - # with open("pickles/hallogrootfreqs.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/hallogrootfreqs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testRootFamSizes(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['ROOTFAMSIZES', labels[0]]) - # with open("pickles/rootfamsizes.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/rootfamsizes.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testRootPFMFs(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['ROOTPFMFS', labels[0]]) - # with open("pickles/rootpfmfs.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/rootpfmfs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testLemmaFreqs(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['LEMMAFREQS', labels[0]]) - # with open("pickles/lemmafreqs.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/lemmafreqs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testRootFreqs(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['ROOTFREQS', labels[0]]) - # with open("pickles/rootfreqs.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/rootfreqs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testMaxFreqs(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['MAXFREQS', labels[0]]) - # with open("pickles/maxfreqs.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/maxfreqs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testConcretes(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['CONCRETES', labels[0]]) - # with open("pickles/concretes.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/concretes.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testAnimates(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['ANIMATES', labels[0]]) - # with open("pickles/animates.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/animates.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testAbstractTraits(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['ABSTRACTTRAITS', labels[0]]) - # with open("pickles/abstracttraits.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/abstracttraits.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testDeictics(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['DEICTICS', labels[0]]) - # with open("pickles/deictics.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/deictics.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testParagraphs(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['PARAGRAPHS', labels[0]]) - # with open("pickles/paragraphs.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/paragraphs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testParagraphLens(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['PARAGRAPHLENS', labels[0]]) - # with open("pickles/paragraphlens.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/paragraphlens.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testTransitionProfile(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['TRANSITIONPROFILE', labels[0]]) - # with open("pickles/transitionprofile.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/transitionprofile.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testTransitionDistances(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['TRANSITIONDISTANCES', labels[0]]) - # with open("pickles/transitiondistances.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/transitiondistances.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSentenceCohesions(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['SENTENCECOHESIONS', labels[0]]) - # with open("pickles/sentencecohesions.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/sentencecohesions.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSliderCohesions(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['SLIDERCOHESIONS', labels[0]]) - # with open("pickles/slidercohesions.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/slidercohesions.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testRhemeDepths(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['RHEMEDEPTHS', labels[0]]) - # with open("pickles/rhemedepths.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/rhemedepths.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testThemeDepths(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['THEMEDEPTHS', labels[0]]) - # with open("pickles/themedepths.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/themedepths.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testWeightedDepths(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['WEIGHTEDDEPTHS', labels[0]]) - # with open("pickles/weighteddepths.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/weighteddepths.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testWeightedBreadths(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['WEIGHTEDBREADTHS', labels[0]]) - # with open("pickles/weightedbreadths.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/weightedbreadths.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSyntacticProfile(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['SYNTACTICPROFILE', labels[0]]) - # with open("pickles/syntacticprofile.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/syntacticprofile.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testNormedSyntacticProfile(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['NORMEDSYNTACTICPROFILE', labels[0]]) - # with open("pickles/normedsyntacticprofile.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/normedsyntacticprofile.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testDirectSpeechSpans(self): - ok = self.parser.send(['PARSEONE', labels[2], texts[2]]) - self.assertEqual(ok, True) - data = self.parser.send(['DIRECTSPEECHSPANS', labels[2]]) - # with open("pickles/directspeechspans.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/directspeechspans.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - print('data',data) - print('comparison', comparison) - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testInteractiveLanguage(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['INTERACTIVELANGUAGE', labels[0]]) - # with open("pickles/interactivelanguage.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/interactivelanguage.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSubjectivityRatingsLanguage(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['SUBJECTIVITYRATINGS', labels[0]]) - # with open("pickles/subjectivityratings.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/subjectivityratings.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testPolarityRatings(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['POLARITYRATINGS', labels[0]]) - # with open("pickles/polarityratings.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/polarityratings.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testAssessments(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['ASSESSMENTS', labels[0]]) - # with open("pickles/assessments.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/assessments.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSentimentRatings(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['SENTIMENTRATINGS', labels[0]]) - # with open("pickles/sentimentratings.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/sentimentratings.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testPerspectiveSpans(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['PERSPECTIVESPANS', labels[0]]) - # with open("pickles/perspectivespans.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/perspectivespans.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testStanceMarkers(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['STANCEMARKERS', labels[0]]) - # with open("pickles/stancemarkers.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/stancemarkers.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testEmotionalStates(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['EMOTIONALSTATES', labels[0]]) - # with open("pickles/emotionalstates.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/emotionalstates.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testCharacterTraits(self): - ok = self.parser.send(['PARSEONE', labels[2], texts[2]]) - self.assertEqual(ok, True) - data = self.parser.send(['CHARACTERTRAITS', labels[2]]) - # with open("pickles/charactertraits.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/charactertraits.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testPropositionalAttitudes(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['PROPOSITIONALATTITUDES', labels[0]]) - # with open("pickles/propositionalattitudes.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/propositionalattitudes.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testGoverningSubjects(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['GOVERNINGSUBJECTS', labels[0]]) - # with open("pickles/governingsubjects.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/governingsubjects.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testDevWords(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['DEVWORDS', labels[0]]) - # with open("pickles/devwords.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/devwords.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testClusterInfo(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['CLUSTERINFO', labels[0]]) - # with open("pickles/clusterinfo.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/clusterinfo.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testDocSummaryLabels(self): - data = self.parser.send(['DOCSUMMARYLABELS']) - # with open("pickles/docsummarylabels.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/docsummarylabels.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - - def testDocSummaryFeats(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['DOCSUMMARYFEATS', labels[0]]) - #with open("pickles/docsummaryfeats2.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/docsummaryfeats.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testTokVecs(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - data = self.parser.send(['TOKVECS', labels[0]]) - # with open("pickles/tokvecs.pkl", "wb") as fp: - # pickle.dump(data, fp) - # fp.close() - with open("pickles/tokvecs.pkl", "rb") as fp: - comparison = pickle.load(fp) - fp.close() - self.assertEqual(data, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - -# Serialization seems not to return exactly the same -# binary result every run. Not sure why. Comment out -# for now. -# def testSerialized(self): -# ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) -# self.assertEqual(ok, True) -# data = self.parser.sendraw(['SERIALIZED', labels[0]]) -# fp = open("pickles/serialized.dat", "wb") -# fp.write(data) -# fp.close() -# fp = open("pickles/serialized.dat", "rb") -# comparison = fp.read() -# fp.close() -# self.assertEqual(data, comparison) -# ok = self.parser.send(['CLEARPARSED']) -# self.assertEqual(ok, True) - - def testIndirectSpeech(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['IN_DIRECT_SPEECH', labels[0]]) - # fp = open("pickles/in_direct_speech.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/in_direct_speech.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testTenseChanges(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['TENSECHANGES', labels[0]]) - # fp = open("pickles/tenseChanges.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/tenseChanges.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testSocialAwareness(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['SOCIAL_AWARENESS', labels[0]]) - # fp = open("pickles/socialAwareness.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/socialAwareness.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testConcreteDetails(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['CONCRETEDETAILS', labels[0]]) - # fp = open("pickles/concreteDetails.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/concreteDetails.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testLocations(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['LOCATIONS', labels[0]]) - # fp = open("pickles/locations.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/locations.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testPromptLanguage(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - promptlanguage = self.parser.send(['PROMPTLANGUAGE', labels[0]]) - # fp = open("pickles/promptlanguage.dat", "wb") - # pickle.dump(promptlanguage, fp) - # fp.close() - fp = open("pickles/promptlanguage.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(promptlanguage, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testContentSegments(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - contentsegments = self.parser.send(['CONTENTSEGMENTS', labels[0]]) - # fp = open("pickles/contentsegments.dat", "wb") - # pickle.dump(contentsegments, fp) - # fp.close() - fp = open("pickles/contentsegments.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(contentsegments, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testPromptRelated(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['PROMPTRELATED', labels[0]]) - # fp = open("pickles/promptrelated.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/promptrelated.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testCoreSentences(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['CORESENTENCES', labels[0]]) - # fp = open("pickles/coresentences.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/coresentences.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testClaimTexts(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['CLAIMTEXTS', labels[0]]) - # fp = open("pickles/claimtexts.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/claimtexts.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - - def testDiscussionTexts(self): - ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) - self.assertEqual(ok, True) - locations = self.parser.send(['DISCUSSIONTEXTS', labels[0]]) - # fp = open("pickles/discussiontexts.dat", "wb") - # pickle.dump(locations, fp) - # fp.close() - fp = open("pickles/discussiontexts.dat", "rb") - comparison = pickle.load(fp) - fp.close() - self.assertEqual(locations, comparison) - ok = self.parser.send(['CLEARPARSED']) - self.assertEqual(ok, True) - -# For some reason this test returns an error due to the AWE_Info extension -# not being registered. This probably is due to parserServer somehow not -# getting all the global information about extensions handled properly, -# or at least that's what internet search suggests, but the code is exactly -# the same here as in all the other tests and I can't find out what's triggering -# the error. The code works when called normally rather than from pytest ... -# def testTokFreqs(self): -# ok = self.parser.send(['PARSEONE', labels[0], texts[0]]) -# self.assertEqual(ok, True) -# data = self.parser.send(['TOKFREQS', labels[0]]) -# print(data, file=sys.stderr) -# # with open("pickles/tokfreqs.pkl", "wb") as fp: -# # pickle.dump(data, fp) -# # fp.close() -# with open("pickles/tokfreqs.pkl", "rb") as fp: -# comparison = pickle.load(fp) -# fp.close() -# self.assertEqual(data, comparison) -# ok = self.parser.send(['CLEARPARSED']) -# self.assertEqual(ok, True) diff --git a/tests/old_tests/test_syntax_discourse_features.py b/tests/old_tests/test_syntax_discourse_features.py deleted file mode 100644 index 159c156..0000000 --- a/tests/old_tests/test_syntax_discourse_features.py +++ /dev/null @@ -1,434 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -from awe_components.components.utility_functions import print_parse_tree -import unittest -import json - -import tensorflow as tf -from awe_workbench.pipeline import pipeline_def - -gpus = tf.config.experimental.list_physical_devices('GPU') -if gpus: - try: - # Currently, memory growth needs to be the same across GPUs - for gpu in gpus: - tf.config.experimental.set_memory_growth(gpu, True) - logical_gpus = tf.config.experimental.list_logical_devices('GPU') - print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") - except RuntimeError as e: - # Memory growth must be set before GPUs have been initialized - print(e) - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -holmes_manager.parse_and_register_document( - document_text="The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", label='GRE_Sample_Essay') - - -class SyntaxDiscourseFeatureTest(unittest.TestCase): - - def test_paragraph_breaks(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - paragraph_breaks = [131, 223, 393, 527, 622, 703] - self.assertEqual(doc._.paragraph_breaks, - paragraph_breaks) - - def test_paragraph_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - paragraph_lengths = [131, 92, 170, 134, 95, 81] - self.assertEqual(doc._.paragraph_lengths, - paragraph_lengths) - - def test_mean_paragraph_length(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_paragraph_length, - 117.16666666666667) - - def test_median_paragraph_length(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_paragraph_length, 113.0) - - def test_max_paragraph_length(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_paragraph_length, 170) - - def test_min_paragraph_length(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_paragraph_length, 81) - - def test_stdev_paragraph_length(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_paragraph_length, - 33.760430486986785) - - def test_transition_word_profile(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - profile = [25, {'temporal': 6, 'PARAGRAPH': 5, 'contrastive': 5, 'illustrative': 6, 'emphatic': 2, 'comparative': 1}, {'over the past century': 1, 'no time in history': 1, 'During the workday': 1, 'Upon leaving home': 1, 'at the turn of the 19th century': 1, 'NEWLINE': 5, 'however': 1, 'the prior examples': 1, 'in fact': 1, 'not only': 1, 'but': 2, 'for example': 1, 'likewise': 1, 'such as': 1, 'in contrast': 1, 'consider how': 2, 'this last example': 1, 'since prehistorical days': 1, 'and yet': 1}, [['over the past century', 0, 13, 16, 'temporal'], ['no time in history', 18, 22, 25, 'temporal'], ['During the workday', 68, 68, 70, 'temporal'], ['Upon leaving home', 95, 95, 97, 'temporal'], ['at the turn of the 19th century', 114, 123, 129, 'temporal'], ['NEWLINE', 131, 131, 131, 'PARAGRAPH'], ['NEWLINE', 223, 223, 223, 'PARAGRAPH'], ['however', 223, 224, 224, 'contrastive'], ['the prior examples', 242, 242, 245, 'illustrative'], ['in fact', 283, 283, 284, 'emphatic'], ['not only', 283, 290, 291, 'emphatic'], ['but', 283, 296, 296, 'contrastive'], ['for example', 309, 309, 310, 'illustrative'], ['likewise', 351, 351, 351, 'comparative'], ['such as', 351, 374, 375, 'illustrative'], ['NEWLINE', 393, 393, 393, 'PARAGRAPH'], ['in contrast', 393, 394, 395, 'contrastive'], ['consider how', 411, 411, 412, 'illustrative'], ['NEWLINE', 527, 527, 527, 'PARAGRAPH'], ['this last example', 527, 528, 531, 'illustrative'], ['consider how', 561, 561, 562, 'illustrative'], ['since prehistorical days', 574, 581, 583, 'temporal'], ['and yet', 574, 585, 586, 'contrastive'], ['NEWLINE', 622, 622, 622, 'PARAGRAPH'], ['but', 671, 685, 685, 'contrastive']]] - self.assertEqual(doc._.transition_word_profile, - profile) - - def test_total_transition_words(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.total_transition_words, 25) - - def test_transition_category_count(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.transition_category_count, 6) - - def test_transition_word_type_count(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.transition_word_type_count, 19) - - def test_transition_words(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - transition_words = [['over the past century', 0, 13, 16, 'temporal'], ['no time in history', 18, 22, 25, 'temporal'], ['During the workday', 68, 68, 70, 'temporal'], ['Upon leaving home', 95, 95, 97, 'temporal'], ['at the turn of the 19th century', 114, 123, 129, 'temporal'], ['NEWLINE', 131, 131, 131, 'PARAGRAPH'], ['NEWLINE', 223, 223, 223, 'PARAGRAPH'], ['however', 223, 224, 224, 'contrastive'], ['the prior examples', 242, 242, 245, 'illustrative'], ['in fact', 283, 283, 284, 'emphatic'], ['not only', 283, 290, 291, 'emphatic'], ['but', 283, 296, 296, 'contrastive'], ['for example', 309, 309, 310, 'illustrative'], ['likewise', 351, 351, 351, 'comparative'], ['such as', 351, 374, 375, 'illustrative'], ['NEWLINE', 393, 393, 393, 'PARAGRAPH'], ['in contrast', 393, 394, 395, 'contrastive'], ['consider how', 411, 411, 412, 'illustrative'], ['NEWLINE', 527, 527, 527, 'PARAGRAPH'], ['this last example', 527, 528, 531, 'illustrative'], ['consider how', 561, 561, 562, 'illustrative'], ['since prehistorical days', 574, 581, 583, 'temporal'], ['and yet', 574, 585, 586, 'contrastive'], ['NEWLINE', 622, 622, 622, 'PARAGRAPH'], ['but', 671, 685, 685, 'contrastive']] - self.assertEqual(doc._.transition_words, - transition_words) - - def test_transition_distances(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - transition_distances = [0.2754433751106262, 0.18068242073059082, 0.3455371856689453, 0.5115512013435364, 0.29295045137405396, 0.26799535751342773, 0.2427629828453064, 0.2613025903701782, 0.3560638427734375, 0.36619603633880615, 0.3768072724342346, 0.171677827835083, 0.21559733152389526, 0.42869412899017334, 0.4524803161621094, 0.4260185956954956, 0.46465003490448, 0.26872915029525757, 0.580579549074173, 0.5725382268428802, 0.41816604137420654, 0.3364747166633606, 0.3091871738433838, 0.6479913592338562, 0.19418299198150635] - self.assertEqual(doc._.transition_distances, - transition_distances) - - def test_mean_transition_distance(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_transition_distance, - 0.3585704064369202) - - def test_median_transition_distance(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_transition_distance, - 0.3455371856689453) - - def test_max_transition_distance(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_transition_distance, - 0.6479913592338562) - - def test_min_transition_distance(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_transition_distance, - 0.171677827835083) - - def test_stdev_transition_distance(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_transition_distance, - 0.1296185349128186) - - def test_intersentence_cohesions(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - intersentence_cohesions = [0.8988593220710754, 0.8736750483512878, 0.8477486968040466, 0.8780263066291809, 0.8904399871826172, 0.8478362560272217, 0.8970850110054016, 0.9370104074478149, 0.8875330090522766, 0.8721916675567627, 0.8623306155204773, 0.8789313435554504, 0.87635737657547, 0.8838761448860168, 0.9297630190849304, 0.8696791529655457, 0.8978860974311829, 0.8615327477455139, 0.855689525604248, 0.8783911466598511, 0.900669276714325, 0.8269333839416504, 0.8536540865898132, 0.8761308789253235, 0.8271347880363464, 0.8295413255691528, 0.8300540447235107, 0.8762711882591248, 0.7860766053199768, 0.8845039010047913, 0.8930788040161133, 0.8380871415138245, 0.9311631321907043, 0.9270924925804138] - self.assertEqual(doc._.intersentence_cohesions, - intersentence_cohesions) - - def test_mean_sent_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_sent_cohesion, - 0.8736833509276895) - - def test_median_sent_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_sent_cohesion, - 0.8763142824172974) - - def test_max_sent_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_sent_cohesion, - 0.9370104074478149) - - def test_min_sent_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_sent_cohesion, - 0.7860766053199768) - - def test_stdev_sent_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_sent_cohesion, - 0.033244800492783624) - - def test_sliding_window_cohesions(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - sliding_window_cohesions = [0.7181742191314697, 0.701263427734375, 0.6790735125541687, 0.6474848985671997, 0.6590394377708435, 0.7189511656761169, 0.6720224618911743, 0.7850678563117981, 0.8007805347442627, 0.7889644503593445, 0.7991914749145508, 0.8140134215354919, 0.8126447200775146, 0.7754191160202026, 0.7475166320800781, 0.7887311577796936, 0.6866664290428162, 0.7194632887840271, 0.7194632887840271, 0.6448104977607727, 0.6458194255828857, 0.6696494221687317, 0.5748744010925293, 0.5917541980743408, 0.4936998188495636, 0.6006785035133362, 0.5567438006401062, 0.5973606705665588, 0.6129812002182007, 0.659326434135437, 0.688444972038269, 0.7055871486663818, 0.6687339544296265, 0.6260309815406799, 0.5268124938011169, 0.5692242980003357, 0.5738590955734253, 0.5684884190559387, 0.6827142238616943, 0.6544976234436035, 0.6336449980735779, 0.5551523566246033, 0.571733832359314, 0.5404956936836243, 0.3755459487438202, 0.5269684791564941, 0.5898616313934326, 0.41509121656417847, 0.44168174266815186, 0.4371638000011444, 0.3960413932800293, 0.4594825208187103, 0.4970448911190033, 0.5166383981704712, 0.5925062298774719, 0.5895653367042542, 0.6845969557762146, 0.46040961146354675, 0.42461514472961426, 0.44434645771980286, 0.4096076488494873, 0.44259214401245117, 0.4406617283821106, 0.44507601857185364, 0.5546365976333618, 0.43519437313079834, 0.5321240425109863, 0.4746674597263336, 0.4140016734600067, 0.4774576425552368, 0.4703843891620636, 0.5924128293991089, 0.5924128293991089, 0.5004821419715881, 0.4637318253517151, 0.33062052726745605, 0.27226918935775757, 0.2537829577922821, 0.2854864001274109, 0.3904673755168915, 0.4211515188217163, 0.45139551162719727, 0.5127795934677124, 0.38112887740135193, 0.24094435572624207, 0.2918039858341217, 0.1845623105764389, 0.22997146844863892, 0.36806720495224, 0.4143584966659546, 0.3875931203365326, 0.38407766819000244, 0.4325980246067047, 0.4053116738796234, 0.3240915536880493, 0.3403795063495636, 0.3285251259803772, 0.3186679482460022, 0.38233309984207153, 0.40397265553474426, 0.4056640863418579, 0.423939049243927, 0.4444441497325897, 0.3398769795894623, 0.36920079588890076, 0.36920079588890076, 0.3883742094039917, 0.42753884196281433, 0.41213420033454895, 0.37709927558898926, 0.39389845728874207, 0.4492016136646271, 0.4492016136646271, 0.2987273335456848, 0.29447853565216064, 0.3743438422679901, 0.4030083119869232, 0.4750116765499115, 0.42504170536994934, 0.48562872409820557, 0.5353590846061707, 0.5397853255271912, 0.5397853255271912, 0.5133278369903564, 0.4792203903198242, 0.5079424977302551, 0.4732956290245056, 0.4496147334575653, 0.47529879212379456, 0.4816325306892395, 0.4889850616455078, 0.6232978701591492, 0.6232978701591492, 0.5230883359909058, 0.5245723128318787, 0.47190043330192566, 0.45224520564079285, 0.52435302734375, 0.5436772704124451, 0.7187649011611938, 0.8517671227455139, 0.804987907409668, 0.6629474759101868, 0.5680908560752869, 0.5643051862716675, 0.5234549045562744, 0.5782879590988159, 0.5782879590988159, 0.692085325717926, 0.7090957760810852, 0.6492884159088135, 0.62215256690979, 0.5129061937332153, 0.5064864754676819, 0.5264977812767029, 0.45184406638145447, 0.5640366077423096, 0.5242792367935181, 0.5922706127166748, 0.5628295540809631, 0.5925487875938416, 0.6522911190986633, 0.5478234887123108, 0.5763028264045715, 0.618124783039093, 0.6638599634170532, 0.5731121897697449, 0.5437830686569214, 0.5437830686569214, 0.4712313711643219, 0.4569089710712433, 0.5454812049865723, 0.48367583751678467, 0.42092519998550415, 0.48569998145103455, 0.42305129766464233, 0.2977829873561859, 0.2977829873561859, 0.3928380012512207, 0.5374341607093811, 0.5031077861785889, 0.680233359336853, 0.680233359336853, 0.7139794230461121, 0.495082288980484, 0.4879363477230072, 0.4879363477230072, 0.5396188497543335, 0.5411055088043213, 0.5838121771812439, 0.5381329655647278, 0.6154993176460266, 0.6020007133483887, 0.5532087087631226, 0.5166041851043701, 0.6518845558166504, 0.6322896480560303, 0.6936717629432678, 0.7012785077095032, 0.6895168423652649, 0.7263908982276917, 0.6917502284049988, 0.702608048915863, 0.6224715709686279, 0.6139522194862366, 0.5929139256477356, 0.6004048585891724, 0.6113495230674744, 0.6583157777786255, 0.563602089881897, 0.5703687071800232, 0.624764621257782, 0.6356892585754395, 0.5418727993965149, 0.660626232624054, 0.6365984678268433, 0.6365984678268433, 0.6277673840522766, 0.6215293407440186, 0.6219359636306763, 0.5721964836120605, 0.5492886304855347, 0.5228410959243774, 0.5004310011863708, 0.5531101822853088, 0.5986908078193665, 0.5671816468238831, 0.5866214036941528, 0.5946487188339233, 0.721796989440918, 0.6296963095664978, 0.6760013103485107, 0.6499212980270386, 0.6191558241844177, 0.5661692023277283, 0.5312169790267944, 0.42728954553604126, 0.4562321603298187, 0.4211899936199188, 0.43973419070243835, 0.4918561577796936, 0.588962733745575, 0.6923847794532776, 0.7171835899353027, 0.6751047372817993, 0.7206659913063049, 0.6406289935112, 0.6438475251197815, 0.5796738266944885, 0.5699208974838257, 0.5898752808570862, 0.5684922337532043, 0.5515444278717041, 0.5951597690582275, 0.5218393206596375, 0.55599445104599, 0.4096278250217438, 0.5285232663154602, 0.5646203756332397, 0.6959474682807922, 0.7203365564346313, 0.7185603380203247, 0.6625391840934753, 0.6423453092575073, 0.4971276819705963, 0.5273186564445496, 0.3788944184780121, 0.48065927624702454, 0.6103437542915344, 0.7828500866889954, 0.7766262292861938, 0.7812368273735046, 0.6906050443649292, 0.696194589138031, 0.6632706522941589, 0.5880956053733826, 0.6033019423484802, 0.600532054901123, 0.6870222687721252, 0.5664738416671753, 0.6295509338378906, 0.6456001400947571, 0.6606005430221558, 0.6630827784538269, 0.6997432708740234, 0.7766095399856567, 0.822784423828125, 0.7297685742378235, 0.7680246829986572, 0.7385520935058594, 0.8694490194320679, 0.8062267899513245, 0.7353216409683228, 0.7639004588127136, 0.7887110114097595, 0.7451591491699219, 0.7286704182624817, 0.7197006344795227, 0.5714356899261475, 0.6237432956695557, 0.6275636553764343, 0.6628205180168152, 0.6909197568893433, 0.6639208197593689, 0.6479867696762085, 0.6235129833221436, 0.5890401601791382, 0.6758988499641418, 0.5882089138031006, 0.5884600877761841, 0.5746558904647827, 0.6486964821815491, 0.6165353655815125, 0.7018287181854248, 0.7054082751274109, 0.6990912556648254, 0.7131054401397705, 0.7826208472251892, 0.8002692461013794, 0.8187614679336548, 0.7922825217247009, 0.7839767336845398, 0.8015739917755127, 0.8150758743286133, 0.8106814026832581, 0.7866733074188232, 0.7180758714675903, 0.5957860350608826, 0.5097404718399048, 0.518147885799408, 0.42179739475250244, 0.3904438316822052, 0.34155407547950745, 0.4236290752887726, 0.5133522152900696, 0.5123850703239441, 0.6250080466270447, 0.5969473719596863, 0.5544553399085999, 0.5544728636741638, 0.49274978041648865, 0.4804668128490448, 0.393265962600708, 0.47033119201660156, 0.5464437007904053, 0.6502533555030823, 0.650912344455719, 0.6295266151428223, 0.6125736832618713, 0.7110123634338379, 0.6587409377098083, 0.6810725927352905, 0.6525141596794128, 0.5871813893318176, 0.5741062760353088, 0.6681912541389465, 0.6420008540153503, 0.6505707502365112, 0.5556239485740662, 0.5587475299835205, 0.5747273564338684, 0.6126003265380859, 0.6067212820053101, 0.5422795414924622, 0.4704667627811432, 0.5488695502281189, 0.536759078502655, 0.5157687664031982, 0.47764521837234497, 0.47986119985580444, 0.5271527767181396, 0.5498452186584473, 0.5290605425834656, 0.46688687801361084, 0.3602869212627411, 0.37699976563453674, 0.3809293508529663, 0.45073172450065613, 0.49928227066993713, 0.5415453314781189, 0.5122532844543457, 0.5543336272239685, 0.36085236072540283, 0.3665997087955475, 0.3116161823272705, 0.3116161823272705, 0.22969196736812592, 0.49183592200279236, 0.4778640866279602, 0.3747110962867737, 0.39294037222862244, 0.4299679398536682, 0.37511005997657776, 0.45231547951698303, 0.45231547951698303, 0.45231547951698303, 0.311810165643692, 0.6087309122085571, 0.6377063393592834, 0.5729437470436096, 0.5713992118835449, 0.5866880416870117, 0.5389125347137451, 0.5725162625312805, 0.5725162625312805, 0.6103866696357727, 0.5800052285194397, 0.6384758353233337, 0.6805055737495422, 0.6303063631057739, 0.632068932056427, 0.6773736476898193, 0.5216607451438904, 0.5345773100852966, 0.5092067718505859, 0.549133837223053, 0.5851325392723083, 0.6893542408943176, 0.6067965626716614, 0.5960580110549927, 0.6024318933486938, 0.6473482251167297, 0.5909489393234253, 0.622308611869812, 0.5430110096931458, 0.5262013673782349, 0.5152072906494141, 0.5069680213928223, 0.47498154640197754, 0.47355157136917114, 0.5156494379043579, 0.5657385587692261, 0.6107592582702637, 0.6410508751869202, 0.7491336464881897, 0.7429540157318115, 0.708656907081604, 0.7128888368606567, 0.7051429748535156, 0.7304097414016724, 0.7082116007804871, 0.7053606510162354, 0.6371103525161743, 0.6145652532577515, 0.6448284387588501, 0.5823889970779419, 0.47524186968803406, 0.4950029253959656, 0.5897514820098877, 0.733385443687439, 0.6809965372085571, 0.6470853686332703, 0.5646995306015015, 0.5743997693061829, 0.567176878452301, 0.5640305876731873, 0.5190287232398987, 0.5860862731933594, 0.45603713393211365, 0.601533830165863, 0.5251269936561584, 0.5251269936561584, 0.4691050052642822, 0.4861484169960022, 0.3381423354148865, 0.3549068570137024, 0.33667048811912537, 0.34557345509529114, 0.3017862141132355, 0.26958876848220825, 0.26958876848220825, 0.26958876848220825, 0.22090551257133484, 0.10331247001886368, 0.09946548193693161, 0.1849459409713745, 0.30455225706100464, 0.32518064975738525, 0.28176671266555786, 0.28176671266555786, 0.25893980264663696, 0.43409574031829834, 0.4541017711162567, 0.43791645765304565, 0.43601515889167786, 0.393983393907547, 0.31059807538986206, 0.4658436179161072, 0.5241922736167908, 0.4947414696216583, 0.5775105357170105, 0.4995882511138916, 0.5370966196060181, 0.6021908521652222, 0.6589798331260681, 0.6821597814559937, 0.5967273116111755, 0.582856297492981, 0.5628841519355774, 0.5423623919487, 0.5445433855056763, 0.6205247640609741, 0.6047906279563904, 0.628349781036377, 0.5308804512023926, 0.5292799472808838, 0.49483180046081543, 0.4438880383968353, 0.44538453221321106, 0.39144372940063477, 0.32624340057373047, 0.33729884028434753, 0.36647048592567444, 0.5381368398666382, 0.5796362161636353, 0.4877815842628479, 0.4056786298751831, 0.3741012513637543, 0.3741012513637543, 0.41936007142066956, 0.3283812701702118, 0.2851704955101013, 0.43395164608955383, 0.5865570902824402, 0.5865570902824402, 0.6814536452293396, 0.8771107196807861, 0.8514654636383057, 0.8616060018539429, 0.8265013098716736, 0.7317827343940735, 0.6125732660293579, 0.6415880918502808, 0.6415880918502808, 0.6773134469985962, 0.7344356179237366, 0.7574276328086853, 0.7808356285095215, 0.7187724113464355, 0.6987088322639465, 0.7151852250099182, 0.6983718276023865, 0.6730572581291199, 0.6597504019737244, 0.606074333190918, 0.5605922937393188, 0.6138680577278137, 0.5882761478424072, 0.566649317741394, 0.5377964377403259, 0.4628887176513672, 0.5489749312400818, 0.5501205325126648, 0.572618842124939, 0.5328660607337952, 0.48593321442604065, 0.610148012638092, 0.5011742115020752, 0.42126917839050293, 0.43704622983932495, 0.35713109374046326, 0.39734765887260437, 0.5136548280715942, 0.5295951962471008, 0.5775947570800781, 0.6341943144798279, 0.6095657348632812, 0.5941829681396484, 0.4871390759944916, 0.4948325455188751, 0.33891695737838745, 0.3814352750778198, 0.40467697381973267, 0.5137110352516174, 0.4322446584701538, 0.4563347399234772, 0.4418659508228302, 0.5515823364257812, 0.5925307869911194, 0.5925307869911194, 0.41388991475105286, 0.42125800251960754, 0.44196179509162903, 0.5017839074134827, 0.4229195713996887, 0.6369653940200806, 0.7045741677284241, 0.7806202173233032, 0.8586754202842712, 0.8089002966880798, 0.701143205165863, 0.6780319809913635, 0.560285747051239, 0.6080725193023682, 0.6440467834472656, 0.6091873645782471, 0.6041891574859619, 0.7203472852706909, 0.6859999299049377, 0.6288899779319763, 0.5956356525421143, 0.6310295462608337, 0.6070647239685059, 0.6026177406311035, 0.37063199281692505, 0.5363000631332397, 0.5828129053115845, 0.5072140693664551, 0.5353472828865051, 0.5218008160591125, 0.5945451855659485, 0.4543611705303192, 0.3569714426994324, 0.3569714426994324, 0.352898508310318, 0.5161055326461792, 0.5477725267410278, 0.547351598739624, 0.5778474807739258, 0.49038827419281006, 0.5734430551528931, 0.6217519640922546, 0.656516969203949, 0.656516969203949, 0.5748251676559448, 0.6277108192443848, 0.6926324367523193, 0.6373811364173889, 0.6373811364173889, 0.6303789615631104, 0.5546615123748779, 0.5224807858467102, 0.5113321542739868, 0.4760902225971222, 0.492556631565094, 0.463143527507782, 0.5552494525909424, 0.5552494525909424, 0.4319190979003906, 0.4683215916156769, 0.48052531480789185, 0.5751261115074158, 0.5751261115074158, 0.4873209297657013, 0.5247870087623596, 0.45412883162498474, 0.5261881351470947, 0.5261881351470947, 0.4173544943332672, 0.4647064805030823, 0.36291688680648804, 0.4990863800048828, 0.4990863800048828, 0.44398272037506104, 0.5105710625648499, 0.34621956944465637, 0.5540810227394104, 0.5540810227394104, 0.5187355875968933, 0.5187355875968933, 0.4385828673839569, 0.39906346797943115, 0.4428623914718628, 0.4133220314979553, 0.49259528517723083, 0.3077422082424164, 0.48961880803108215, 0.4834340512752533, 0.515978991985321, 0.5658944249153137, 0.626227617263794, 0.5341516137123108, 0.6856047511100769, 0.6929025053977966, 0.6483656167984009, 0.6830957531929016, 0.6830957531929016, 0.6730726957321167, 0.5893316268920898, 0.6402474045753479, 0.6975016593933105, 0.6524498462677002, 0.6220090389251709, 0.5655167698860168] - self.assertEqual(doc._.sliding_window_cohesions, - sliding_window_cohesions) - - def test_mean_slider_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_slider_cohesion, - 0.5531269748482788) - - def test_median_slider_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_slider_cohesion, - 0.5628568530082703) - - def test_max_slider_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_slider_cohesion, - 0.8771107196807861) - - def test_min_slider_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_slider_cohesion, - 0.09946548193693161) - - def test_stdev_slider_cohesion(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_slider_cohesion, - 0.12972704854938172) - - def test_num_coref_chains(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.num_corefs, 4) - - def test_mean_coref_chain_len(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_coref_chain_len, 2) - - def test_median_coref_chain_len(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_coref_chain_len, 2.0) - - def test_max_coref_chain_len(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_coref_chain_len, 2) - - def test_min_coref_chain_len(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_coref_chain_len, 2) - - def test_stdev_coref_chain_len(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_coref_chain_len, 0.0) - - def test_sentence_count(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.sentence_count, 35) - - def test_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.sentence_lengths, - [18, 19, 15, 16, 27, 19, 17, 22, 22, 37, 11, 19, 10, 17, 14, 26, 19, 23, 27, 15, 18, 20, 23, 16, 14, 23, 20, 20, 14, 13, 31, 17, 20, 29, 33]) - - def test_mean_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_sentence_len, - 20.114285714285714) - - def test_median_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_sentence_len, 19) - - def test_max_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_sentence_len, 37) - - def test_min_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_sentence_len, 10) - - def test_stdev_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_sentence_len, - 6.153771815969878) - - def test_sqrt_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.sqrt_sentence_lengths, - [4.242640687119285, 4.358898943540674, 3.872983346207417, 4.0, 5.196152422706632, 4.358898943540674, 4.123105625617661, 4.69041575982343, 4.69041575982343, 6.082762530298219, 3.3166247903554, 4.358898943540674, 3.1622776601683795, 4.123105625617661, 3.7416573867739413, 5.0990195135927845, 4.358898943540674, 4.795831523312719, 5.196152422706632, 3.872983346207417, 4.242640687119285, 4.47213595499958, 4.795831523312719, 4.0, 3.7416573867739413, 4.795831523312719, 4.47213595499958, 4.47213595499958, 3.7416573867739413, 3.605551275463989, 5.5677643628300215, 4.123105625617661, 4.47213595499958, 5.385164807134504, 5.744562646538029]) - - def test_mean_sqrt_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_sqrt_sentence_len, - 4.436401006267681) - - def test_median_sqrt_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_sqrt_sentence_len, - 4.358898943540674) - - def test_max_sqrt_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_sqrt_sentence_len, - 6.082762530298219) - - def test_min_sqrt_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_sqrt_sentence_len, - 3.1622776601683795) - - def test_stdev_sqrt_sentence_lengths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.std_sqrt_sentence_len, - 0.6673502014232635) - - def test_words_to_sentence_root(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.words_before_sentence_root, - [8, 3, 7, 2, 5, 8, 7, 3, 2, 8, 2, 10, 3, 7, 4, 4, 8, 10, 5, 5, 10, 0, 10, 2, 7, 14, 8, 4, 12, 0, 3, 6, 4, 10, 1]) - - def test_mean_words_to_sentence_root(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_words_to_sentence_root, - 5.771428571428571) - - def test_median_words_to_sentence_root(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_words_to_sentence_root, 5) - - def test_max_words_to_sentence_root(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_words_to_sentence_root, 14) - - def test_min_words_to_sentence_root(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_words_to_sentence_root, 0) - - def test_stdev_words_to_sentence_root(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_words_to_sentence_root, - 3.507015777533022) - - def test_syntacticThemeDepths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.syntacticThemeDepths, - [2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 3, 3, 2, 3, 3, 2, 3, 5, 4, 2, 2, 2, 2, 2, 0, 2, 3, 3, 3, 2, 2]) - - def test_meanThemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.meanThemeDepth, 2.28125) - - def test_medianThemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.medianThemeDepth, 2.0) - - def test_maxThemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.maxThemeDepth, 5.0) - - def test_minThemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.minThemeDepth, 0.0) - - def test_stdevThemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdevThemeDepth, - 0.9240295694193397) - - def test_syntacticRhemeDepths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.syntacticRhemeDepths, - [1.0, 2.0, 4.0, 4.0, 3.0, 2.0, 4.0, 4.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 6.0, 4.0, 3.0, 5.0, 4.0, 2.0, 1.0, 3.0, 3.0, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 2.0, 3.0, 4.0, 6.0, 5.0, 7.0, 6.0, 7.0, 9.0, 9.0, 9.0, 8.0, 2.0, 1.0, 2.0, 3.0, 4.0, 3.0, 3.0, 2.0, 3.0, 5.0, 4.0, 6.0, 5.0, 6.0, 7.0, 9.0, 8.0, 10.0, 9.0, 12.0, 11.0, 10.0, 2.0, 1.0, 2.0, 4.0, 3.0, 5.0, 4.0, 5.0, 6.0, 8.0, 7.0, 2.0, 1.0, 2.0, 2.0, 4.0, 3.0, 4.0, 6.0, 6.0, 5.0, 2.0, 1.0, 3.0, 2.0, 4.0, 4.0, 3.0, 3.0, 5.0, 4.0, 5.0, 7.0, 6.0, 8.0, 8.0, 8.0, 7.0, 8.0, 9.0, 2.0, 1.0, 3.0, 4.0, 4.0, 3.0, 4.0, 5.0, 2.0, 4.0, 3.0, 5.0, 5.0, 5.0, 4.0, 5.0, 6.0, 5.0, 7.0, 6.0, 2.0, 1.0, 3.0, 3.0, 5.0, 4.0, 5.0, 4.0, 5.0, 5.0, 5.0, 4.0, 3.0, 4.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 5.0, 6.0, 7.0, 7.0, 6.0, 7.0, 6.0, 2.0, 1.0, 3.0, 2.0, 2.0, 3.0, 6.0, 5.0, 4.0, 2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 5.0, 4.0, 2.0, 1.0, 3.0, 3.0, 2.0, 3.0, 4.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 3.0, 2.0, 4.0, 3.0, 2.0, 1.0, 3.0, 2.0, 4.0, 4.0, 4.0, 3.0, 4.0, 5.0, 2.0, 1.0, 2.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 4.0, 4.0, 4.0, 3.0, 4.0, 5.0, 2.0, 1.0, 3.0, 2.0, 3.0, 5.0, 4.0, 5.0, 7.0, 7.0, 6.0, 2.0, 1.0, 3.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 7.0, 7.0, 6.0, 5.0, 2.0, 1.0, 3.0, 3.0, 2.0, 4.0, 3.0, 4.0, 4.0, 5.0, 6.0, 2.0, 2.0, 5.0, 4.0, 4.0, 3.0, 5.0, 4.0, 6.0, 5.0, 6.0, 2.0, 1.0, 3.0, 3.0, 2.0, 3.0, 5.0, 4.0, 5.0, 4.0, 2.0, 1.0, 3.0, 3.0, 2.0, 4.0, 4.0, 3.0, 2.0, 1.0, 3.0, 4.0, 4.0, 3.0, 4.0, 4.0, 3.0, 4.0, 6.0, 5.0, 3.0, 2.0, 3.0, 5.0, 5.0, 4.0, 5.0, 6.0, 2.0, 1.0, 2.0, 2.0, 4.0, 4.0, 3.0, 2.0, 4.0, 5.0, 4.0, 4.0, 3.0, 2.0, 1.0, 2.0, 3.0, 4.0, 4.0, 4.0, 4.0, 3.0, 4.0, 4.0, 6.0, 6.0, 5.0, 2.0, 1.0, 2.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0, 7.0, 6.0, 5.0, 4.0, 2.0, 1.0, 3.0, 3.0, 3.0, 2.0, 3.0, 5.0, 4.0, 2.0, 4.0, 3.0, 2.0, 1.0, 3.0, 3.0, 2.0, 3.0, 5.0, 5.0, 5.0, 4.0, 5.0, 5.0, 7.0, 6.0, 7.0, 8.0, 2.0, 1.0, 2.0, 1.0, 3.0, 4.0, 4.0, 4.0, 3.0, 2.0, 4.0, 4.0, 3.0, 4.0, 5.0, 2.0, 1.0, 3.0, 3.0, 2.0, 2.0, 4.0, 3.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 4.0, 5.0, 2.0, 4.0, 3.0, 2.0, 1.0, 3.0, 2.0, 5.0, 4.0, 3.0, 4.0, 5.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 2.0, 2.0, 4.0, 3.0, 4.0, 5.0, 4.0, 6.0, 5.0, 6.0, 7.0, 2.0, 1.0, 3.0, 2.0, 3.0, 5.0, 4.0, 5.0, 6.0, 5.0, 6.0, 5.0, 6.0, 5.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 4.0, 6.0, 6.0, 5.0, 4.0, 6.0, 5.0, 2.0, 2.0, 3.0, 1.0, 3.0, 3.0, 2.0, 2.0, 4.0, 3.0, 5.0, 5.0, 4.0, 5.0, 7.0, 6.0, 7.0, 9.0, 8.0, 2.0]) - - def test_meanRhemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.meanRhemeDepth, - 3.733067729083665) - - def test_medianRhemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.medianRhemeDepth, 3.0) - - def test_maxRhemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.maxRhemeDepth, 12.0) - - def test_minRhemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.minRhemeDepth, 1.0) - - def test_stdevThemeDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdevRhemeDepth, - 1.8765445139321595) - - def test_weightedSyntacticDepths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.weightedSyntacticDepths, - [5.0, 3.0, 5.0, 6.0, 8.0, 6.0, 9.0, 7.0, 1.0, 2.0, 5.0, 5.0, 3.0, 2.0, 5.0, 5.0, 3.0, 2.0, 3.0, 3.0, 3.0, 1.0, 4.0, 2.0, 3.0, 4.0, 6.0, 8.0, 8.0, 6.0, 7.0, 8.0, 6.0, 4.0, 7.0, 5.0, 2.0, 5.0, 5.0, 3.0, 4.0, 7.0, 7.0, 5.0, 1.0, 4.0, 4.0, 4.0, 2.0, 5.0, 3.0, 2.0, 5.0, 3.0, 1.0, 2.0, 3.0, 4.0, 7.0, 5.0, 9.0, 7.0, 8.0, 11.0, 11.0, 11.0, 9.0, 2.0, 3.0, 6.0, 4.0, 3.0, 3.0, 1.0, 2.0, 4.0, 6.0, 4.0, 4.0, 2.0, 3.0, 6.0, 4.0, 8.0, 6.0, 7.0, 8.0, 11.0, 9.0, 3.0, 1.0, 6.0, 4.0, 2.0, 2.0, 3.0, 4.0, 5.0, 3.0, 6.0, 3.0, 3.0, 3.0, 1.0, 2.0, 5.0, 3.0, 7.0, 5.0, 6.0, 8.0, 11.0, 9.0, 2.0, 3.0, 4.0, 7.0, 7.0, 5.0, 3.0, 3.0, 1.0, 2.0, 2.0, 5.0, 3.0, 4.0, 7.0, 7.0, 5.0, 2.0, 3.0, 5.0, 3.0, 1.0, 4.0, 2.0, 5.0, 5.0, 3.0, 3.0, 6.0, 4.0, 5.0, 8.0, 6.0, 10.0, 10.0, 10.0, 8.0, 9.0, 10.0, 2.0, 5.0, 3.0, 1.0, 4.0, 6.0, 6.0, 4.0, 5.0, 6.0, 2.0, 5.0, 3.0, 7.0, 7.0, 7.0, 5.0, 6.0, 8.0, 6.0, 9.0, 7.0, 2.0, 3.0, 4.0, 4.0, 7.0, 5.0, 3.0, 3.0, 3.0, 1.0, 4.0, 4.0, 7.0, 5.0, 6.0, 5.0, 6.0, 6.0, 7.0, 5.0, 4.0, 6.0, 6.0, 4.0, 4.0, 2.0, 5.0, 3.0, 6.0, 4.0, 5.0, 6.0, 7.0, 8.0, 6.0, 7.0, 6.0, 2.0, 3.0, 3.0, 1.0, 4.0, 2.0, 2.0, 3.0, 8.0, 6.0, 4.0, 2.0, 3.0, 3.0, 3.0, 5.0, 3.0, 4.0, 5.0, 3.0, 3.0, 3.0, 1.0, 4.0, 2.0, 6.0, 4.0, 7.0, 7.0, 5.0, 2.0, 5.0, 5.0, 3.0, 1.0, 4.0, 4.0, 2.0, 3.0, 4.0, 2.0, 5.0, 3.0, 4.0, 3.0, 4.0, 3.0, 5.0, 1.0, 4.0, 2.0, 4.0, 4.0, 4.0, 2.0, 5.0, 3.0, 2.0, 5.0, 3.0, 3.0, 3.0, 1.0, 4.0, 2.0, 6.0, 6.0, 6.0, 4.0, 5.0, 6.0, 2.0, 3.0, 4.0, 3.0, 3.0, 1.0, 2.0, 4.0, 4.0, 5.0, 2.0, 5.0, 3.0, 2.0, 2.0, 3.0, 3.0, 1.0, 4.0, 2.0, 6.0, 6.0, 6.0, 4.0, 5.0, 6.0, 2.0, 3.0, 4.0, 3.0, 5.0, 3.0, 4.0, 5.0, 3.0, 1.0, 4.0, 2.0, 3.0, 6.0, 4.0, 5.0, 8.0, 8.0, 6.0, 2.0, 3.0, 6.0, 6.0, 4.0, 5.0, 8.0, 6.0, 3.0, 5.0, 3.0, 1.0, 4.0, 2.0, 3.0, 4.0, 7.0, 7.0, 8.0, 8.0, 9.0, 7.0, 5.0, 2.0, 3.0, 3.0, 4.0, 5.0, 3.0, 1.0, 4.0, 4.0, 2.0, 6.0, 4.0, 5.0, 5.0, 6.0, 7.0, 2.0, 2.0, 7.0, 5.0, 5.0, 3.0, 6.0, 4.0, 7.0, 5.0, 6.0, 2.0, 3.0, 4.0, 7.0, 7.0, 5.0, 1.0, 4.0, 4.0, 2.0, 3.0, 6.0, 4.0, 5.0, 4.0, 2.0, 3.0, 3.0, 4.0, 5.0, 8.0, 6.0, 3.0, 3.0, 3.0, 3.0, 1.0, 4.0, 4.0, 2.0, 5.0, 5.0, 3.0, 2.0, 1.0, 4.0, 6.0, 6.0, 4.0, 5.0, 6.0, 4.0, 5.0, 8.0, 6.0, 4.0, 2.0, 3.0, 6.0, 6.0, 4.0, 5.0, 6.0, 2.0, 3.0, 4.0, 7.0, 7.0, 7.0, 5.0, 6.0, 9.0, 7.0, 4.0, 1.0, 2.0, 2.0, 5.0, 5.0, 3.0, 2.0, 5.0, 7.0, 5.0, 5.0, 3.0, 2.0, 5.0, 3.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 6.0, 4.0, 5.0, 5.0, 8.0, 8.0, 6.0, 2.0, 3.0, 6.0, 6.0, 4.0, 3.0, 3.0, 3.0, 1.0, 2.0, 5.0, 7.0, 5.0, 3.0, 2.0, 5.0, 3.0, 4.0, 5.0, 6.0, 9.0, 7.0, 8.0, 11.0, 11.0, 11.0, 11.0, 9.0, 10.0, 1.0, 4.0, 2.0, 3.0, 10.0, 8.0, 6.0, 4.0, 2.0, 5.0, 5.0, 3.0, 4.0, 5.0, 6.0, 5.0, 3.0, 1.0, 4.0, 4.0, 4.0, 2.0, 3.0, 6.0, 4.0, 2.0, 5.0, 3.0, 2.0, 3.0, 5.0, 5.0, 3.0, 1.0, 4.0, 4.0, 2.0, 3.0, 3.0, 3.0, 3.0, 1.0, 2.0, 2.0, 5.0, 3.0, 4.0, 5.0, 2.0, 3.0, 4.0, 7.0, 5.0, 6.0, 7.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 1.0, 2.0, 1.0, 4.0, 6.0, 6.0, 6.0, 4.0, 2.0, 5.0, 5.0, 3.0, 4.0, 5.0, 2.0, 5.0, 3.0, 3.0, 1.0, 4.0, 4.0, 2.0, 2.0, 5.0, 3.0, 2.0, 2.0, 5.0, 3.0, 6.0, 4.0, 5.0, 6.0, 3.0, 7.0, 5.0, 3.0, 1.0, 4.0, 2.0, 7.0, 5.0, 3.0, 4.0, 5.0, 2.0, 3.0, 4.0, 3.0, 6.0, 3.0, 3.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 1.0, 4.0, 4.0, 2.0, 2.0, 2.0, 5.0, 3.0, 4.0, 5.0, 4.0, 7.0, 5.0, 6.0, 7.0, 2.0, 3.0, 6.0, 4.0, 5.0, 8.0, 8.0, 6.0, 3.0, 3.0, 3.0, 1.0, 4.0, 2.0, 3.0, 6.0, 4.0, 5.0, 6.0, 5.0, 7.0, 5.0, 6.0, 5.0, 2.0, 4.0, 4.0, 4.0, 2.0, 2.0, 3.0, 1.0, 4.0, 2.0, 6.0, 4.0, 5.0, 8.0, 8.0, 6.0, 5.0, 8.0, 6.0, 2.0, 2.0, 3.0, 1.0, 4.0, 4.0, 2.0, 2.0, 5.0, 3.0, 7.0, 7.0, 5.0, 6.0, 9.0, 7.0, 8.0, 11.0, 9.0, 2.0]) - - def test_meanWeightedDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.meanWeightedDepth, - 4.353693181818182) - - def test_medianWeightedDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.medianWeightedDepth, 4.0) - - def test_maxWeightedDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.maxWeightedDepth, 11.0) - - def test_minWeightedDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.minWeightedDepth, 1.0) - - def test_stdevWeightedDepth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdevWeightedDepth, - 2.1464617078006403) - - def test_weightedSyntacticBreadths(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.weightedSyntacticBreadths, - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]) - - def test_meanWeightedBreadth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.meanWeightedBreadth, - 1.3053977272727273) - - def test_medianWeightedBreadth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.medianWeightedBreadth, 1.0) - - def test_maxThemeBreadth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.maxWeightedBreadth, 3.0) - - def test_minWeightedBreadth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.minWeightedBreadth, 1.0) - - def test_stdevWeightedBreadth(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdevWeightedBreadth, - 0.5078886772161095) - - def test_syntacticProfile(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - profile = {'DET': 76, 'DET:Definite=Def': 47, 'DET:PronType=Art': 64, 'DET-det-NOUN': 75, 'NOUN': 189, 'NOUN:Number=Sing': 134, 'NOUN-nsubj-VERB': 35, 'VERB': 78, 'VERB:Aspect=Prog': 11, 'VERB:Tense=Pres': 33, 'VERB:VerbForm=Part': 28, 'VERB-acl-NOUN': 3, 'NOUN-dobj-VERB': 34, 'ADV': 23, 'ADV-advmod-ADP': 2, 'ADP': 81, 'ADP-prep-VERB': 32, 'ADJ': 62, 'ADJ:Degree=Pos': 60, 'ADJ-amod-NOUN': 54, 'NOUN-pobj-ADP': 71, 'VERB:Number=Sing': 17, 'VERB:Person=3': 17, 'VERB:VerbForm=Fin': 25, 'VERB-ROOT-VERB': 29, 'PUNCT': 69, 'PUNCT:PunctType=Peri': 35, 'PUNCT-punct-VERB': 47, 'ADV-advmod-AUX': 1, 'PRON': 22, 'PRON-expl-AUX': 2, 'AUX': 36, 'AUX-HAVE:Mood=Ind': 7, 'AUX-HAVE:Number=Sing': 6, 'AUX-HAVE:Person=3': 6, 'AUX-HAVE:Tense=Pres': 7, 'AUX-HAVE:VerbForm=Fin': 8, 'AUX-aux-AUX': 4, 'AUX-BE:Tense=Past': 6, 'AUX-BE:VerbForm=Part': 2, 'AUX-ROOT-AUX': 5, 'NOUN-attr-AUX': 2, 'ADP-prep-NOUN': 40, 'SCONJ': 17, 'SCONJ-advmod-VERB': 6, 'VERB:Aspect=Perf': 17, 'VERB:Tense=Past': 20, 'VERB-amod-NOUN': 6, 'NOUN:Number=Plur': 55, 'AUX-aux-VERB': 20, 'VERB-relcl-NOUN': 11, 'ADV:Degree=Cmp': 3, 'ADV-advmod-ADV': 2, 'ADV-advmod-VERB': 13, 'PUNCT-punct-AUX': 8, 'DET:Definite=Ind': 17, 'VERB-ccomp-VERB': 7, 'ADJ:Degree=Sup': 2, 'PRON:PronType=Rel': 9, 'PRON-nsubj-VERB': 9, 'NOUN-compound-NOUN': 18, 'ADP-prep-AUX': 2, 'PUNCT:PunctType=Comm': 31, 'NOUN-nsubj-AUX': 2, 'AUX-BE:Mood=Ind': 9, 'AUX-BE:Tense=Pres': 6, 'AUX-BE:VerbForm=Fin': 10, 'ADJ-acomp-AUX': 3, 'SCONJ-mark-VERB': 9, 'AUX-MODAL:VerbForm=Fin': 11, 'VERB:VerbForm=Inf': 25, 'VERB-ccomp-AUX': 2, 'PRON-nsubj-AUX': 3, 'AUX-relcl-NOUN': 2, 'NUM': 2, 'NUM:NumType=Card': 2, 'NUM-nummod-NOUN': 1, 'NOUN-npadvmod-ADV': 1, 'ADV-acomp-AUX': 1, 'SCONJ-prep-VERB': 2, 'VERB-pcomp-SCONJ': 1, 'NOUN-nsubjpass-VERB': 4, 'AUX-BE:VerbForm=Inf': 3, 'AUX-auxpass-VERB': 5, 'ADP-prep-PRON': 1, 'DET:Number=Plur': 3, 'DET:PronType=Dem': 9, 'AUX-HAVE:VerbForm=Inf': 1, 'SPACE': 5, 'SPACE-dep-VERB': 5, 'PART': 17, 'PART-aux-VERB': 11, 'VERB-xcomp-VERB': 3, 'PRON:Case=Acc': 4, 'PRON:Number=Plur': 5, 'PRON:Person=3': 4, 'PRON:PronType=Prs': 8, 'PRON:Reflex=Yes': 3, 'PRON-pobj-ADP': 2, 'AUX-BE:Number=Sing': 4, 'AUX-BE:Person=3': 4, 'VERB-advcl-VERB': 6, 'PUNCT-punct-NOUN': 10, 'NOUN-conj-NOUN': 10, 'CCONJ': 14, 'CCONJ:ConjType=Cmp': 14, 'CCONJ-cc-NOUN': 7, 'NOUN-nmod-NOUN': 2, 'NOUN-ROOT-NOUN': 1, 'DET:Number=Sing': 6, 'NOUN-appos-NOUN': 1, 'VERB-pcomp-ADP': 3, 'NOUN-nsubj-ADJ': 2, 'ADJ-ccomp-VERB': 2, 'PART:Polarity=Neg': 5, 'PART-neg-VERB': 3, 'PRON-appos-NOUN': 1, 'PART-preconj-VERB': 1, 'ADV-advmod-PART': 1, 'CCONJ-cc-VERB': 5, 'PRON:Gender=Neut': 1, 'PRON:Number=Sing': 2, 'VERB-conj-VERB': 5, 'NOUN-attr-VERB': 2, 'ADP-prep-ADJ': 3, 'PUNCT:PunctType=Dash': 3, 'PART-neg-AUX': 1, 'ADJ-amod-ADP': 1, 'ADJ-compound-NOUN': 1, 'PRON:Case=Nom': 1, 'PRON:Person=1': 3, 'PROPN': 13, 'PROPN:Number=Sing': 13, 'PROPN-nsubj-VERB': 3, 'PUNCT-punct-PROPN': 4, 'NOUN-appos-PROPN': 1, 'PART-prep-VERB': 1, 'NOUN-pobj-PART': 1, 'ADV-advmod-ADJ': 2, 'PRON-nsubjpass-VERB': 1, 'ADP-prt-VERB': 2, 'ADV:Degree=Sup': 1, 'PROPN-compound-PROPN': 6, 'NUM-appos-PROPN': 1, 'ADP-prep-NUM': 1, 'DET-det-PROPN': 1, 'PROPN-pobj-ADP': 3, 'PRON:Gender=Masc': 1, 'PRON:Poss=Yes': 2, 'PRON-poss-NOUN': 2, 'AUX-HAVE:Tense=Past': 1, 'NOUN-pobj-SCONJ': 1, 'VERB-prep-AUX': 1, 'NOUN-pobj-VERB': 1, 'CCONJ-preconj-ADJ': 1, 'CCONJ-cc-ADJ': 1, 'ADJ-conj-ADJ': 1, 'VERB-advcl-AUX': 1, 'PRON-expl-VERB': 1, 'VERB:Mood=Ind': 1, 'PROPN-compound-NOUN': 1, 'ADV-advmod-CCONJ': 1, 'PRON-dobj-VERB': 1} - self.assertEqual(doc._.syntacticProfile, - profile) - - def test_syntacticProfileNormed(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - profileKeys = ['DET', 'DET:Definite=Def', 'DET:PronType=Art', 'DET-det-NOUN', 'NOUN', 'NOUN:Number=Sing', 'NOUN-nsubj-VERB', 'VERB', 'VERB:Aspect=Prog', 'VERB:Tense=Pres', 'VERB:VerbForm=Part', 'VERB-acl-NOUN', 'NOUN-dobj-VERB', 'ADV', 'ADV-advmod-ADP', 'ADP', 'ADP-prep-VERB', 'ADJ', 'ADJ:Degree=Pos', 'ADJ-amod-NOUN', 'NOUN-pobj-ADP', 'VERB:Number=Sing', 'VERB:Person=3', 'VERB:VerbForm=Fin', 'VERB-ROOT-VERB', 'PUNCT', 'PUNCT:PunctType=Peri', 'PUNCT-punct-VERB', 'ADV-advmod-AUX', 'PRON', 'PRON-expl-AUX', 'AUX', 'AUX-HAVE:Mood=Ind', 'AUX-HAVE:Number=Sing', 'AUX-HAVE:Person=3', 'AUX-HAVE:Tense=Pres', 'AUX-HAVE:VerbForm=Fin', 'AUX-aux-AUX', 'AUX-BE:Tense=Past', 'AUX-BE:VerbForm=Part', 'AUX-ROOT-AUX', 'NOUN-attr-AUX', 'ADP-prep-NOUN', 'SCONJ', 'SCONJ-advmod-VERB', 'VERB:Aspect=Perf', 'VERB:Tense=Past', 'VERB-amod-NOUN', 'NOUN:Number=Plur', 'AUX-aux-VERB', 'VERB-relcl-NOUN', 'ADV:Degree=Cmp', 'ADV-advmod-ADV', 'ADV-advmod-VERB', 'PUNCT-punct-AUX', 'DET:Definite=Ind', 'VERB-ccomp-VERB', 'ADJ:Degree=Sup', 'PRON:PronType=Rel', 'PRON-nsubj-VERB', 'NOUN-compound-NOUN', 'ADP-prep-AUX', 'PUNCT:PunctType=Comm', 'NOUN-nsubj-AUX', 'AUX-BE:Mood=Ind', 'AUX-BE:Tense=Pres', 'AUX-BE:VerbForm=Fin', 'ADJ-acomp-AUX', 'SCONJ-mark-VERB', 'AUX-MODAL:VerbForm=Fin', 'VERB:VerbForm=Inf', 'VERB-ccomp-AUX', 'PRON-nsubj-AUX', 'AUX-relcl-NOUN', 'NUM', 'NUM:NumType=Card', 'NUM-nummod-NOUN', 'NOUN-npadvmod-ADV', 'ADV-acomp-AUX', 'SCONJ-prep-VERB', 'VERB-pcomp-SCONJ', 'NOUN-nsubjpass-VERB', 'AUX-BE:VerbForm=Inf', 'AUX-auxpass-VERB', 'ADP-prep-PRON', 'DET:Number=Plur', 'DET:PronType=Dem', 'AUX-HAVE:VerbForm=Inf', 'SPACE', 'SPACE-dep-VERB', 'PART', 'PART-aux-VERB', 'VERB-xcomp-VERB', 'PRON:Case=Acc', 'PRON:Number=Plur', 'PRON:Person=3', 'PRON:PronType=Prs', 'PRON:Reflex=Yes', 'PRON-pobj-ADP', 'AUX-BE:Number=Sing', 'AUX-BE:Person=3', 'VERB-advcl-VERB', 'PUNCT-punct-NOUN', 'NOUN-conj-NOUN', 'CCONJ', 'CCONJ:ConjType=Cmp', 'CCONJ-cc-NOUN', 'NOUN-nmod-NOUN', 'NOUN-ROOT-NOUN', 'DET:Number=Sing', 'NOUN-appos-NOUN', 'VERB-pcomp-ADP', 'NOUN-nsubj-ADJ', 'ADJ-ccomp-VERB', 'PART:Polarity=Neg', 'PART-neg-VERB', 'PRON-appos-NOUN', 'PART-preconj-VERB', 'ADV-advmod-PART', 'CCONJ-cc-VERB', 'PRON:Gender=Neut', 'PRON:Number=Sing', 'VERB-conj-VERB', 'NOUN-attr-VERB', 'ADP-prep-ADJ', 'PUNCT:PunctType=Dash', 'PART-neg-AUX', 'ADJ-amod-ADP', 'ADJ-compound-NOUN', 'PRON:Case=Nom', 'PRON:Person=1', 'PROPN', 'PROPN:Number=Sing', 'PROPN-nsubj-VERB', 'PUNCT-punct-PROPN', 'NOUN-appos-PROPN', 'PART-prep-VERB', 'NOUN-pobj-PART', 'ADV-advmod-ADJ', 'PRON-nsubjpass-VERB', 'ADP-prt-VERB', 'ADV:Degree=Sup', 'PROPN-compound-PROPN', 'NUM-appos-PROPN', 'ADP-prep-NUM', 'DET-det-PROPN', 'PROPN-pobj-ADP', 'PRON:Gender=Masc', 'PRON:Poss=Yes', 'PRON-poss-NOUN', 'AUX-HAVE:Tense=Past', 'NOUN-pobj-SCONJ', 'VERB-prep-AUX', 'NOUN-pobj-VERB', 'CCONJ-preconj-ADJ', 'CCONJ-cc-ADJ', 'ADJ-conj-ADJ', 'VERB-advcl-AUX', 'PRON-expl-VERB', 'VERB:Mood=Ind', 'PROPN-compound-NOUN', 'ADV-advmod-CCONJ', 'PRON-dobj-VERB'] - profileValues = [1.6097160165462159e-06, 1.5876644820403395e-06, 1.6097160077938598e-06, 1.6097160165462159e-06, 0.11734772195100875, 0.1111685316507911, 5.7155921353452216e-05, 0.00017147511446523268, 1.5762725441930697e-47, 0.000171467764061607, 1.9698103109370564e-29, 1.0502827114718416e-13, 2.6461074700672327e-07, 3.6751492639822676e-09, 4.344777660469962e-56, 0.01954811895812287, 0.0010296004396043517, 0.03909473845471586, 0.03909473845471586, 0.03909473845471586, 0.11729030141889078, 0.000171467764061607, 0.000171467764061607, 0.000171467764061607, 1.2503365612760016e-15, 0.33333333374168345, 0.33333333333333354, 0.3333333337416833, 0.0, 1.905197378511503e-05, 2.5528741627242305e-25, 2.0422993301795482e-24, 2.934975251537704e-91, 2.934975251537704e-91, 2.934975251537704e-91, 2.934975251537704e-91, 3.724946553900859e-61, 5.105748325448461e-25, 8.624654039179588e-42, 2.0644014691841255e-281, 1.5317244976345382e-24, 9.19034698580723e-24, 0.018518518518518517, 2.8942975955463003e-18, 5.544342507309039e-68, 1.9698103109370564e-29, 1.9698103109370564e-29, 4.1431205309012577e-112, 0.006179190300217641, 1.6288530555859902e-37, 0.00017146776406035664, 1.0151057579821045e-122, 5.764954127036667e-212, 4.8871624456674475e-37, 2.093541928296115e-16, 2.205152575352049e-08, 1.1975779815782083e-65, 1.3975504996565956e-81, 1.9051973784484073e-05, 5.7885951910926005e-18, 7.093227575035193e-46, 2.477281763020949e-280, 4.08349919188988e-10, 1.3915246613768347e-265, 8.624654039179588e-42, 3.0241575045854284e-165, 8.624654039179588e-42, 4.536236256878146e-164, 2.8942975955463003e-18, 5.105748325450092e-25, 7.350403625698821e-09, 2.103985288001771e-261, 1.0080525015284763e-165, 3.0241575045854284e-165, 2.6421889416752313e-104, 2.6421889416752313e-104, 6.938256552387698e-296, 2.081476965716311e-295, 6.244430897148933e-295, 2.68196151880862e-58, 7.49331707657872e-294, 2.8748846797265295e-42, 1.5317244976345382e-24, 8.624654039179588e-42, 3.185804736395259e-284, 3.2143225767887714e-156, 3.104122128250716e-62, 6.881338230613748e-282, 2.714755092643318e-38, 2.714755092643318e-38, 5.2548867144738624e-14, 5.2548867144738624e-14, 8.108397861075463e-51, 5.7885951910926005e-18, 5.7885951910926005e-18, 1.4792876071535965e-98, 5.7885951910926005e-18, 1.210314497464257e-196, 6.778036433725769e-205, 8.61827013537561e-42, 8.61827013537561e-42, 1.5762725441930697e-47, 3.970229897868724e-21, 9.783250838459017e-92, 1.2250497547798627e-09, 1.2250497547798627e-09, 3.261083612819672e-92, 1.3548775895946469e-173, 2.48352791178414e-237, 3.104122128250716e-62, 2.980233494140968e-236, 4.694998230970349e-75, 9.730077433290555e-50, 2.9190232299871665e-49, 1.5120787522927123e-164, 3.137135159858683e-193, 2.2241335366653768e-215, 2.767065593704205e-200, 8.301196781112616e-200, 1.225049754660756e-09, 1.2103144906862195e-196, 1.4792876071535965e-98, 7.350298527964535e-09, 1.7504711857864028e-14, 8.757069689961499e-49, 1.1200623996644071e-166, 1.5120787522927123e-164, 9.920748693792508e-159, 3.3326096476146002e-152, 6.996880607359798e-147, 5.7885951910926005e-18, 2.52067850753242e-12, 2.52067850753242e-12, 5.429510185286636e-38, 1.2327484799244677e-99, 8.058236787314125e-129, 2.3497818471807976e-125, 4.229607324925436e-124, 1.2081339468108072e-107, 3.196852261497882e-117, 1.5959759509954837e-43, 1.2081339468108052e-107, 3.1618488384815892e-86, 2.6421889416752313e-104, 7.926566825025699e-104, 3.804752076012338e-102, 9.48554651544475e-86, 1.4792876071535965e-98, 2.347348180031211e-74, 2.347348180031211e-74, 3.724946553900859e-61, 1.6091769112851713e-57, 1.9698103107440054e-29, 3.9396206214880114e-28, 1.1910689693606172e-20, 1.0719620724245555e-19, 3.2158862172736668e-19, 6.946314229311121e-17, 6.251682806380008e-16, 1.2503365612760016e-15, 2.52067850753242e-12, 3.6751492639822676e-09, 1.9051973784484073e-05] - self.assertEqual(list(doc._.syntacticProfileNormed.keys()), - profileKeys) - self.assertEqual(list(doc._.syntacticProfileNormed.values()), - profileValues) - - def test_syntacticVariety(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.syntacticVariety, 163) - - def test_pastTenseScope(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - pastTenseScope = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.pastTenseScope, - pastTenseScope) - - def test_propn_past(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_past, - 0.3252840909090909) - - doc = holmes_manager.get_document('GRE_Sample_Essay') diff --git a/tests/old_tests/test_viewpoint_perspective_features.py b/tests/old_tests/test_viewpoint_perspective_features.py deleted file mode 100644 index 3616fb2..0000000 --- a/tests/old_tests/test_viewpoint_perspective_features.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_components.components.utility_functions import print_parse_tree -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -holmes_manager.parse_and_register_document( - document_text="The statement linking technology negatively with free thinking plays on recent human experience over the past century. Surely there has been no time in history where the lived lives of people have changed more dramatically. A quick reflection on a typical day reveals how technology has revolutionized the world. Most people commute to work in an automobile that runs on an internal combustion engine. During the workday, chances are high that the employee will interact with a computer that processes information on silicon bridges that are .09 microns wide. Upon leaving home, family members will be reached through wireless networks that utilize satellites orbiting the earth. Each of these common occurrences could have been inconceivable at the turn of the 19th century.\n\nThe statement attempts to bridge these dramatic changes to a reduction in the ability for humans to think for themselves. The assumption is that an increased reliance on technology negates the need for people to think creatively to solve previous quandaries. Looking back at the introduction, one could argue that without a car, computer, or mobile phone, the hypothetical worker would need to find alternate methods of transport, information processing and communication. Technology short circuits this thinking by making the problems obsolete.\n\nHowever, this reliance on technology does not necessarily preclude the creativity that marks the human species. The prior examples reveal that technology allows for convenience. The car, computer and phone all release additional time for people to live more efficiently. This efficiency does not preclude the need for humans to think for themselves. In fact, technology frees humanity to not only tackle new problems, but may itself create new issues that did not exist without technology. For example, the proliferation of automobiles has introduced a need for fuel conservation on a global scale. With increasing energy demands from emerging markets, global warming becomes a concern inconceivable to the horse-and-buggy generation. Likewise dependence on oil has created nation-states that are not dependent on taxation, allowing ruling parties to oppress minority groups such as women. Solutions to these complex problems require the unfettered imaginations of maverick scientists and politicians.\n\nIn contrast to the statement, we can even see how technology frees the human imagination. Consider how the digital revolution and the advent of the internet has allowed for an unprecedented exchange of ideas. WebMD, a popular internet portal for medical information, permits patients to self research symptoms for a more informed doctor visit. This exercise opens pathways of thinking that were previously closed off to the medical layman. With increased interdisciplinary interactions, inspiration can arrive from the most surprising corners. Jeffrey Sachs, one of the architects of the UN Millenium Development Goals, based his ideas on emergency care triage techniques. The unlikely marriage of economics and medicine has healed tense, hyperinflation environments from South America to Eastern Europe.\n\nThis last example provides the most hope in how technology actually provides hope to the future of humanity. By increasing our reliance on technology, impossible goals can now be achieved. Consider how the late 20th century witnessed the complete elimination of smallpox. This disease had ravaged the human race since prehistorical days, and yet with the technology of vaccines, free thinking humans dared to imagine a world free of smallpox. Using technology, battle plans were drawn out, and smallpox was systematically targeted and eradicated.\n\nTechnology will always mark the human experience, from the discovery of fire to the implementation of nanotechnology. Given the history of the human race, there will be no limit to the number of problems, both new and old, for us to tackle. There is no need to retreat to a Luddite attitude to new things, but rather embrace a hopeful posture to the possibilities that technology provides for new avenues of human imagination.", label='GRE_Sample_Essay') - - -class ViewpointPerspectiveFeatureTest(unittest.TestCase): - - def test_vwp_perspective(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - perspective_spans = {'implicit': {'8': [0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], '2': [2, 3, 4, 5, 7], '6': [6], '21': [18, 19, 20, 21, 22, 23, 24, 25, 36], '33': [26, 27, 28, 29, 30, 31, 32, 33, 34, 35], '44': [37, 38, 39, 40, 41, 42, 43, 44, 51], '48': [45, 46, 47, 48, 49, 50], '54': [52, 53, 54, 55, 56, 57, 58, 59, 67], '61': [60, 61, 62, 63, 64, 65, 66], '73': [68, 69, 70, 71, 72, 73, 74, 94], '79': [75, 76, 77, 78, 79, 80, 81, 82], '84': [83, 84, 85, 86, 87, 88], '90': [89, 90, 91, 92, 93], '103': [95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 113], '108': [107, 108, 109], '110': [110, 111, 112], '121': [114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130], '134': [131, 132, 133, 134, 135, 136, 137, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152], '138': [138], '155': [153, 154, 155, 174], '162': [156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173], '214': [212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222], '233': [223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 241], '237': [236, 237, 238, 239, 240], '245': [242, 244, 245, 251], '243': [243], '248': [246, 247, 248, 249, 250], '259': [252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268], '273': [269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], '287': [283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296], '299': [297, 298, 299, 300, 301, 308], '305': [302, 303, 304, 305, 306, 307], '317': [309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327], '338': [328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 350], '341': [341, 342, 343, 344, 345, 346, 347, 348, 349], '356': [351, 352, 353, 354, 355, 356, 357, 358, 359, 366, 367, 368, 369, 370, 371, 372, 373, 375, 376, 377], '361': [360, 361, 362, 363, 364, 365], '374': [374], '383': [378, 379, 380, 382, 383, 384, 386, 387, 388, 389, 390, 391, 392], '381': [381], '385': [385], '411': [411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 427, 428, 429, 430], '426': [426], '441': [431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453], '456': [454, 455, 456, 457, 458, 459, 469], '463': [460, 461, 462, 463, 464, 465, 466, 467, 468], '477': [470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 482, 483], '481': [480, 481], '515': [507, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526], '508': [508], '531': [527, 528, 530, 531, 532, 534, 535, 546], '529': [529], '533': [533], '539': [536, 537, 538, 539, 540, 541, 542, 543, 544, 545], '559': [547, 548, 550, 551, 552, 553, 555, 556, 557, 558, 559, 560], '554': [554], '561': [561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573], '577': [574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585], '596': [586, 587, 588, 589, 590, 591, 592, 594, 595, 596, 597, 598, 604], '593': [593], '601': [599, 600, 601, 602, 603], '611': [605, 606, 607, 608, 609, 610, 611, 612, 613, 614], '618': [615, 616, 617, 618, 619, 620, 621], '626': [622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641], '652': [642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 668, 669, 670], '672': [671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 690, 691, 692, 693, 703], '689': [689], '696': [694, 695, 696, 697, 698, 699, 700, 701, 702]}, 'explicit_1': [393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 549, 667], 'explicit_2': [], 'explicit_3': {181: [175, 176, 177, 178, 179, 180, 181, 182, 183, 211], 197: [184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210], 485: [484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506]}} - self.assertEqual(doc._.vwp_perspective_spans, perspective_spans) - - def test_vwp_stance_markers(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - stance_markers = {'implicit': {'8': [1, 8], '2': [4], '6': [6], '21': [18], '33': [35], '44': [39, 44], '42': [42], '73': [72], '117': [117], '121': [119, 122], '134': [133, 134, 149], '138': [138], '155': [154], '162': [159, 168, 169, 173], '214': [216, 220, 221], '233': [227, 232, 235], '245': [244, 245], '248': [250], '259': [267], '273': [270, 279], '287': [283, 284, 289, 291, 294, 296], '299': [297, 301], '317': [309, 317, 319], '338': [331, 340], '341': [341], '356': [351, 375], '381': [381], '383': [382, 388], '385': [385], '411': [411, 412], '426': [426], '477': [476], '481': [481], '508': [508], '531': [531, 534], '533': [533], '539': [538, 540], '559': [550, 555, 556], '554': [554], '561': [561], '577': [585], '593': [593], '596': [598], '601': [601], '611': [609], '626': [625, 633, 638], '652': [659], '672': [674, 687, 690, 693], '689': [689]}, 'explicit_1': [401, 402, 409], 'explicit_2': [], 'explicit_3': {181: [175, 182], 197: [196, 198], 485: [503]}} - self.assertEqual(doc._.vwp_stance_markers, stance_markers) - - def test_propn_egocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_egocentric, 0.4431818181818182) - - def test_propn_allocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_allocentric, 0.08522727272727272) - - def test_propositional_attitudes(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - propositional_attitudes = {'implicit': [[[18, 36], None, 21], [[37, 51], 39, 44], [[68, 94], 72, 73], [[114, 130], 114, 121], [[223, 241], 227, 233], [[283, 308], 286, 287], [[351, 377], 352, 356], [[411, 430], None, 411], [[470, 483], 475, 477], [[547, 560], 555, 559], [[561, 573], None, 561], [[622, 641], 623, 626], [[642, 670], None, 652], [[671, 703], None, 672]], 'implicit_3': [[[0, 17], 1, 8], [[131, 152], 133, 134], [[153, 174], 154, 155], [[175, 211], 181, 175], [[212, 222], None, 214], [[212, 222], None, 218], [[242, 251], 244, 245], [[269, 282], 270, 273], [[378, 392], None, 378], [[393, 410], None, 395], [[527, 546], 530, 531], [[527, 546], None, 534], [[671, 703], None, 687]], 'explicit_1': [[[393, 410], 400, 403]], 'explicit_2': [], 'explicit_3': {181: [[[175, 211], 181, 183]], 595: [[[586, 604], 595, 598]]}} - self.assertEqual(doc._.vwp_propositional_attitudes, - propositional_attitudes) - - def test_emotional_states(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - emotion_states = {'explicit_1': [], - 'explicit_2': [], - 'explicit_3': {'Worker': [199], - 'Humans': [596]}} - self.assertEqual(doc._.vwp_emotion_states, emotion_states) - - def test_character_traits(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - # No character traits detected in this essay. We may need another test - # with a different text. - character_traits = {'explicit_1': [], - 'explicit_2': [], - 'explicit_3': {'People': [169, - 267]}} - self.assertEqual(doc._.vwp_character_traits, - character_traits) - - - # This text contains no quoted or direct speech. We need another test article - # to do proper regression on these features. - def test_vwp_quoted(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_quoted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.AWE_Info(indicator='vwp_quoted'), vwp_quoted) - - def test_vwp_direct_speech_spans(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.vwp_direct_speech_spans, []) - - def test_governing_subjects(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - governing_subjects = [None, None, None, None, 3, 3, None, None, 1, 1, None, None, None, 1, None, 1, 1, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 31, 31, 31, None, None, None, None, None, None, None, None, 39, None, None, None, 46, None, None, None, None, None, 53, 53, None, None, None, None, None, 59, 59, None, 66, None, None, None, None, None, None, None, None, 72, 72, None, None, None, None, 77, 77, None, None, None, 82, None, None, None, None, None, 88, None, None, 89, None, None, None, None, None, None, None, None, None, 100, 100, 100, 100, None, 106, None, None, None, None, None, None, None, None, None, None, None, None, 114, 114, 114, None, 114, 114, None, 114, 114, None, None, None, None, 133, None, 133, None, None, None, 139, None, None, None, None, None, None, None, None, 147, 147, None, None, None, None, 154, None, None, None, None, None, None, 159, None, None, None, None, None, 166, 166, None, 166, None, None, None, 181, 181, 181, None, None, None, None, None, 181, None, 197, None, None, None, None, None, None, 193, None, None, None, 197, None, None, 197, None, 197, None, None, None, None, None, None, None, None, None, None, None, 214, None, None, None, None, None, None, None, 220, None, None, 227, None, None, None, None, None, None, None, 227, 227, None, None, None, 235, None, None, None, None, None, None, None, 244, None, None, 247, 247, 247, None, None, None, None, None, None, None, None, 253, None, None, None, None, None, 263, 263, 263, None, None, None, None, None, 270, None, None, None, None, None, 277, 277, None, None, 288, None, None, None, 286, None, None, None, None, 286, None, None, None, None, None, None, 298, None, None, None, None, None, 301, 301, None, None, 319, None, None, None, None, None, None, None, 313, None, None, None, None, None, None, None, None, None, None, 337, None, None, None, None, 334, None, None, None, None, 337, None, 337, 337, 337, None, None, None, None, None, None, 337, None, 352, None, None, None, None, 352, None, None, None, None, 359, None, 360, 360, 360, None, 352, None, None, None, 369, None, None, None, None, None, None, None, None, None, None, None, 378, None, None, None, None, None, None, None, None, None, None, 400, None, None, None, None, None, None, None, 400, 400, None, None, 405, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 415, 415, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 431, None, None, None, None, None, 442, None, 450, None, None, None, None, None, None, 455, None, None, None, None, None, None, 460, None, None, None, None, None, None, 475, None, None, None, None, None, None, 475, 475, None, 481, 482, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 485, None, 499, 499, None, None, None, None, None, None, None, None, None, None, None, None, None, 511, None, None, None, None, None, None, None, 519, None, None, None, None, None, None, None, 530, None, None, None, None, None, None, 537, 537, None, 540, None, 540, 540, 540, None, 555, 555, None, 549, 549, None, None, None, None, None, 555, None, 555, None, None, None, None, None, None, None, 566, None, 570, None, None, None, None, None, None, None, 575, None, None, None, None, None, None, None, None, 595, 595, None, None, None, None, None, None, None, None, 595, None, 595, None, None, 600, 600, 600, None, 609, None, None, None, None, None, 609, None, None, None, None, None, 615, 615, None, 615, None, None, None, None, 623, 623, None, None, None, None, 629, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 667, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 690, None, None, None, None, 695, 694, 699, None, None, None, None, None] - self.assertEqual(doc._.AWE_Info(indicator='governing_subject'), governing_subjects) - - doc = holmes_manager.get_document('GRE_Sample_Essay') - content_segments = [[18, 37], [37, 52], [52, 68], [68, 95], [95, 114], [114, 131], [242, 269], [309, 328], [328, 351], [351, 392], [411, 470], [470, 526], [527, 547], [547, 561], [561, 574], [574, 621]] - self.assertEqual(doc._.content_segments, content_segments) - - def test_prompt_related(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_related = [[14, 3.6747192922762837, ['technology', 'emerge', 'market', 'Technology', 'nanotechnology'], [3, 46, 161, 212, 229, 247, 286, 307, 333, 334, 405, 537, 552, 589, 606, 623, 640, 695]], [51, 2.7333009865760953, ['thinking', 'assumption', 'argue', 'hypothetical', 'fact', 'consider', 'layman', 'actually', 'imagine'], [7, 154, 183, 196, 216, 284, 411, 459, 468, 538, 561, 594, 598]], [33, 2.6008002462296087, ['human', 'earth', 'humanity'], [11, 112, 147, 239, 277, 288, 408, 545, 579, 595, 628, 647, 701]]] - self.assertEqual(doc._.prompt_related, prompt_related) - - def test_prompt_language(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_language = ['technology', 'think', 'thinking', 'human', 'sure', 'surely', 'chance', 'chances', 'conceive', 'inconceivable', 'look', 'looking', 'argue', 'however', 'necessary', 'necessarily', 'create', 'creativity', 'efficient', 'efficiency', 'fact', 'only', 'imagine', 'imagination', 'imaginations', 'contrast', 'consider', 'hope', 'yet', 'embrace'] - self.assertEqual(doc._.prompt_language, prompt_language) - - def test_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - core_sentences = [[0, 18], [131, 153], [153, 175], [212, 223], [283, 309], [393, 411], [671, 704]] - self.assertEqual(doc._.core_sentences, core_sentences) - - def test_extended_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - extended_core_sentences = [[175, 212], [223, 242], [269, 283], [622, 642], [642, 671]] - self.assertEqual(doc._.extended_core_sentences, extended_core_sentences) diff --git a/tests/old_tests/test_viewpoint_perspective_features2.py b/tests/old_tests/test_viewpoint_perspective_features2.py deleted file mode 100644 index 082411f..0000000 --- a/tests/old_tests/test_viewpoint_perspective_features2.py +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_components.components.utility_functions import print_parse_tree -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -holmes_manager.parse_and_register_document( - document_text="Surely many of us have expressed the following sentiment, or some variation on it, during our daily commutes to work: \"People are getting so stupid these days!\" Surrounded as we are by striding and strident automatons with cell phones glued to their ears, PDA's gripped in their palms, and omniscient, omnipresent CNN gleaming in their eyeballs, it's tempting to believe that technology has isolated and infantilized us, essentally transforming us into dependent, conformist morons best equipped to sideswip one another in our SUV's.\n\nFurthermore, hanging around with the younger, pre-commute generation, whom tech-savviness seems to have rendered lethal, is even less reassuring. With \"Teen People\" style trends shooting through the air from tiger-striped PDA to zebra-striped PDA, and with the latest starlet gossip zipping from juicy Blackberry to teeny, turbo-charged cell phone, technology seems to support young people's worst tendencies to follow the crowd. Indeed, they have seemingly evolved into intergalactic conformity police. After all, today's tech-aided teens are, courtesy of authentic, hands-on video games, literally trained to kill; courtesy of chat and instant text messaging, they have their own language; they even have tiny cameras to efficiently photodocument your fashion blunders! Is this adolescence, or paparazzi terrorist training camp?\n\nWith all this evidence, it's easy to believe that tech trends and the incorporation of technological wizardry into our everyday lives have served mostly to enforce conformity, promote dependence, heighten comsumerism and materialism, and generally create a culture that values self-absorption and personal entitlement over cooperation and collaboration. However, I argue that we are merely in the inchoate stages of learning to live with technology while still loving one another. After all, even given the examples provided earlier in this essay, it seems clear that technology hasn't impaired our thinking and problem-solving capacities. Certainly it has incapacitated our behavior and manners; certainly our values have taken a severe blow. However, we are inarguably more efficient in our badness these days. We're effective worker bees of ineffectiveness!\n\nIf Technology has so increased our senses of self-efficacy that we can become veritable agents of the awful, virtual CEO's of selfishness, certainly it can be beneficial. Harnessed correctly, technology can improve our ability to think and act for ourselves. The first challenge is to figure out how to provide technology users with some direly-needed direction.", label='GRE_Sample_Essay') - - -class ViewpointPerspectiveFeatureTest(unittest.TestCase): - - def test_vwp_perspective(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - perspective_spans = {'implicit': {'26': [23, 24, 25, 26, 27, 28, 29, 30, 31, 32], '52': [33, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65], '45': [45, 46, 47, 48], '58': [58], '68': [66, 67, 68, 69, 70, 71, 98], '75': [72, 73, 74, 75, 76, 77, 79, 80, 81, 83, 84, 85, 86, 87], '89': [88, 89, 90, 91, 92, 93, 94, 96, 97], '123': [99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 122, 123, 124, 125, 126, 127], '117': [113, 114, 115, 116, 117, 118, 119, 120, 121], '170': [128, 151, 152, 153, 154, 155, 168, 169, 170, 171, 172, 173, 174, 175, 177, 178, 179, 180, 181, 182], '135': [129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150], '156': [156, 157, 158, 159, 160, 161, 162, 166, 167], '165': [163, 164, 165], '176': [176], '188': [183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193], '236': [194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 243, 244, 245], '246': [246, 247, 248, 249, 250, 251, 252, 253, 254, 255], '263': [256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 312], '281': [267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299], '301': [300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311], '351': [337, 338, 339, 340, 341, 342, 343, 349, 350, 351, 352, 365], '344': [344, 345, 346, 347, 348], '357': [353, 354, 355, 356, 357, 359, 360, 361, 362, 363, 364], '369': [366, 367, 368, 369, 371, 372, 373], '436': [405, 432, 433, 434, 435, 436, 437, 438], '410': [406, 407, 408, 409, 410, 412, 413, 414, 415, 416], '425': [425], '444': [439, 440, 441, 442, 443, 444, 446, 447, 448, 449, 450, 451, 453], '457': [454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 471, 472], '470': [468, 469, 470]}, 'explicit_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 78, 82, 95, 277, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 358, 370, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 411, 417, 418, 419, 420, 421, 422, 423, 424, 426, 427, 428, 429, 430, 431, 445, 452], 'explicit_2': [242], 'explicit_3': {}} - self.assertEqual(doc._.vwp_perspective_spans, - perspective_spans) - - def test_vwp_stance_markers(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - stance_markers = {'implicit': {'26': [28, 31], '58': [58], '68': [69, 71], '89': [88], '123': [100, 124, 126], '117': [117], '170': [170, 172], '176': [176], '188': [183, 187], '236': [194, 195, 205, 215, 220, 235, 240, 245], '246': [246, 255], '263': [264, 266], '281': [282, 296], '351': [337, 338, 340, 343, 351, 352], '357': [361], '369': [366], '410': [406, 409, 412], '425': [425], '436': [433, 435, 437], '444': [440, 443, 448], '457': [456, 459], '470': [468]}, 'explicit_1': [0, 1, 5, 40, 375, 377, 384, 388, 390, 399, 403, 404, 419, 421], 'explicit_2': [], 'explicit_3': {}} - self.assertEqual(doc._.vwp_stance_markers, - stance_markers) - - def test_propn_egocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_egocentric, - 0.7061310782241015) - - def test_propn_allocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_allocentric, 0.0) - - def test_propositional_attitudes(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - propositional_attitudes = {'implicit': [[[0, 32], 24, 26], [[33, 98], 67, 68], [[99, 127], None, 123], [[128, 182], 169, 170], [[183, 193], 185, 188], [[194, 245], 228, 229], [[337, 365], 350, 351], [[337, 365], 350, 351], [[366, 383], 377, 379], [[384, 396], 386, 387], [[405, 438], 434, 436], [[439, 453], 442, 444]], 'implicit_3': [[[454, 472], 456, 457]], 'explicit_1': [[[313, 336], 315, 316]], 'explicit_2': [], 'explicit_3': {}} - self.assertEqual(doc._.vwp_propositional_attitudes, - propositional_attitudes) - - def test_emotional_states(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - emotion_states = {'explicit_1': [333], 'explicit_2': [], 'explicit_3': {'Culture': [301]}} - self.assertEqual(doc._.vwp_emotion_states, - emotion_states) - - def test_character_traits(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - # No character traits detected in this essay. We may need another test - # with a different text. - character_traits = {'explicit_1': [40, 390], 'explicit_2': [], 'explicit_3': {'People': [28], 'Pda': [58], 'Teens': [205, 220, 240]}} - self.assertEqual(doc._.vwp_character_traits, - character_traits) - - def test_subjectivity_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - subjectivity_ratings = [0.8888888888888888, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.75, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6666666666666666, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8333333333333334, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.95, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3833333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5714285714285714, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5714285714285714, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5714285714285714, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.subjectivity_ratings, - subjectivity_ratings) - - def test_mean_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_subjectivity, - 0.07682315064390537) - - def test_med_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_subjectivity, 0.0) - - def test_max_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_subjectivity, 1.0) - - def test_min_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_subjectivity, 0.0) - - def test_std_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_subjectivity, - 0.22539452272167562) - - def test_polarity_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - polarity_ratings = [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.8, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.16666666666666666, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.43333333333333335, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.2, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.10000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, -0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.polarity_ratings, - polarity_ratings) - - def test_mean_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_polarity, - 0.0026535571346892103) - - def test_med_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_polarity, 0.0) - - def test_max_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_polarity, 1.0) - - def test_min_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_polarity, -1.0) - - def test_stdev_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_polarity, - 0.17544979638212832) - - def test_sentiment_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - sentiment_ratings = [0.41000000000000003, 0, 0, 0, 0.21, 0.17, 0, 0.08, 0.30000000000000004, 0, 0, 0.06, -0.29, 0.08, 0, 0, 0, 0, 0.18, 0, 0, 0.01, 0, 0, 0.17, 0, 0, 0, -0.54, 0, 0, 0, 0, 0, -0.07, 0, 0, -0.1, 0, 0, -0.17, 0, 0, -0.22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.21, 0, 0, 0, 0.30000000000000004, 0, 0, 0, 0, 0, 0, 0.26, 0, 0.51, 0, 0.13, 0, -0.16, 0, 0, 0, 0, 0, 0, 0, 0, 0.56, 0, 0.24, 0, 0.36, 0.16, 0, 0, 0.27, -0.07, 0, 0, -0.06, 0, 0, 0, 0, 0, 0, 0.15, 0, 0, -0.28, 0, 0, 0, -0.21, 0.27, 0, 0, 0.19, 0, 0, 0, 0, 0.21, 0, -0.63, 0, 0, 0.08, 0.1, 0.21, 0, 0, 0, 0.02, 0.17, 0, 0.45, 0, 0, 0.13, 0, 0.42, 0, 0.25, 0, 0.06, 0, 0, 0.36, 0, 0.06, 0, 0, 0, 0, 0, -0.42, 0.31, -0.37, 0, 0, 0.51, 0.4, 0, 0.15, 0, 0, 0, 0.19, -0.22, 0.27, 0, 0.13, 0, 0, 0.47000000000000003, 0.32, 0.17, 0, 0.25, 0, 0, -0.04, 0, -0.13, 0, 0, 0, 0, 0.21, 0.15, 0, 0, 0.33, 0.24, -0.1, 0, 0, 0.21, 0, 0.15, 0, 0.19, 0, 0, 0.14, 0, 0, 0.44, 0, 0.47000000000000003, 0, 0, 0, 0.08, 0.41000000000000003, 0, 0, 0, 0, 0, -0.79, 0, 0.44, 0, 0.18, 0, 0.21, 0.37, 0, 0, 0, 0.21, 0, 0, 0.34, 0, 0, 0.08, 0.21, 0.02, 0, 0, 0.48, 0, 0, 0.06, 0, 0, 0, 0, 0.25, 0, 0, 0, -0.66, 0.04, 0.5, 0, 0, 0, 0.21, 0, -0.07, 0, 0, 0, 0.61, 0, 0.51, 0, 0.19, 0, 0, 0, 0.09, 0, 0.07, 0.38, 0, 0, 0.11, 0, 0.21, 0, 0, 0, 0.08, 0.24, 0, 0.38, -0.35000000000000003, 0, 0.34, 0, 0, -0.37, 0, 0, -0.42, 0.71, 0.04, 0.32, 0, 0, 0.46, 0, 0.21, 0, 0.27, 0, 0.17, 0.4, 0, 0.28, 0, 0.08, 0, -0.19, -0.46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.35000000000000003, 0, 0.73, 0, 0.13, 0, -0.02, 0.36, 0.27, -0.07, 0, 0, 0.21, 0, 0.08, 0.17, 0, 0, 0, 0, 0, 0, -0.02, 0, 0, 0, 0.28, 0, -0.13, 0, 0, 0.21, 0, -0.15, 0, 0.37, 0, 0, 0, 0, 0.15, 0, 0, -0.34, 0, 0.07, 0, 0, 0, 0.15, 0, 0, 0.21, -0.12, 0.04, -0.44, 0.27, 0, 0.08, 0, 0, 0, 0, 0, 0.48, 0, 0, -0.36, 0, 0, 0, 0, 0, 0.48, 0.23, 0, 0, 0.48, 0, 0, 0, 0.13, 0, 0, 0, 0, 0, 0, 0.46, 0, 0.12, 0, 0, 0.35000000000000003, 0.33, 0.03, 0, 0, 0, -0.68, 0, 0.27, 0, 0, 0, -0.51, 0, 0.15, 0, 0.35000000000000003, 0.29, 0.4, 0, 0, -0.38, 0, 0.13, 0.35000000000000003, 0.28, 0, 0.5, 0, 0.42, 0, 0.16, 0, 0, 0, 0, 0.58, 0.23, 0, 0, 0.02, -0.30000000000000004, 0, 0, 0.41000000000000003, 0.13, 0, 0, 0.06, 0, 0, 0.12, 0.11, 0] - print(doc._.sentiment_ratings) - self.assertEqual(doc._.sentiment_ratings, - sentiment_ratings) - - def test_mean_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_sentiment, - 0.0925943396226415) - - def test_med_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_sentiment, 0.025) - - def test_max_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_sentiment, 0.73) - - def test_min_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_sentiment, -0.79) - - def test_stdev_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_sentiment, - 0.2517283677948165) - - def test_tone_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - tone_ratings = [0.5, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.29, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, -0.1, 0.0, 0.0, -0.17, 0.0, 0.0, -0.22, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13, 0.0, 0.0, 0.0, 0.0, 0.0, -0.16, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.8, 1.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, -0.06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.28, 0.0, 0.0, 0.0, -0.21, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.63, 0.0, 0.0, 0.04, -0.16666666666666666, 0.105, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.42, 0.0, -0.37, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.22, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, -1.0, 0.0, 0.0, -0.04, 0.0, -0.13, 0.0, 0.0, 0.0, 0.0, 0.0, 0.075, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.105, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.47000000000000003, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.79, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0.04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.66, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.61, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.2, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, -0.35000000000000003, 0.0, 0.0, 0.0, 0.0, -0.37, 0.0, 0.0, -0.42, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, -0.46, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, -0.02, 0.36, 0.0, -0.07, 0.0, 0.0, 0.105, 0.0, 0.04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.02, 0.0, 0.0, 0.0, 0.28, 0.0, 0.0, 0.0, 0.0, -0.21, 0.0, 0.0, 0.0, -0.37, 0.0, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, -0.34, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, -0.12, 0.0, -0.44, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0.24, 0.0, 0.0, -0.36, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 0.24, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17500000000000002, 0.0, 0.015, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.51, 0.0, 0.21428571428571427, 0.0, 0.17500000000000002, 0.0, 0.2, 0.0, 0.0, -0.38, 0.0, 0.0, 0.17500000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - print(doc._.tone_ratings) - self.assertEqual(doc._.tone_ratings, - tone_ratings) - - def test_mean_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_tone, - -0.036631034060279344) - - def test_med_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_tone, 0.0) - - def test_max_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_tone, 1.0) - - def test_min_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_tone, -1.0) - - def test_stdev_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_tone, - 0.2166434664720677) - - def test_vwp_arguments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_arguments = [0, 1, 2, 3, 4, 5, 25, 26, 27, 28, 31, 69, 70, 71, 72, 100, 113, 117, 118, 119, 120, 124, 125, 126, 128, 151, 170, 171, 172, 176, 177, 178, 179, 183, 186, 187, 188, 189, 194, 195, 215, 229, 235, 236, 245, 250, 257, 258, 259, 260, 264, 265, 266, 267, 280, 281, 282, 283, 313, 315, 316, 317, 318, 319, 337, 338, 340, 341, 342, 343, 344, 345, 351, 352, 353, 356, 359, 366, 368, 369, 375, 376, 377, 378, 379, 384, 386, 388, 389, 390, 406, 408, 409, 410, 411, 412, 413, 417, 418, 419, 421, 425, 433, 435, 436, 437, 440, 443, 444, 445, 446, 447, 448, 454, 456, 457, 458, 459, 461, 462, 463, 466, 467, 468] - print(doc._.vwp_arguments) - self.assertEqual(doc._.vwp_arguments, - vwp_arguments) - - def test_propn_argument_words(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_argument_words, - 0.26215644820295986) - - def test_vwp_interactives(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_interactives = [3, 17, 27, 28, 29, 35, 51, 67, 68, 78, 82, 95, 97, 124, 175, 176, 198, 235, 242, 247, 258, 259, 262, 263, 277, 282, 315, 318, 332, 340, 347, 355, 356, 358, 370, 376, 386, 389, 392, 394, 397, 398, 409, 411, 418, 425, 429, 445, 452] - self.assertEqual(doc._.vwp_interactives, - vwp_interactives) - - def test_propn_interactive(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_interactive, - 0.10359408033826638) - - # This text contains no quoted or direct speech. - # We need another test article - # to do proper regression on these features. - def test_vwp_quoted(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_quoted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.vwp_quoted, - vwp_quoted) - - def test_vwp_direct_speech_spans(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.vwp_direct_speech_spans, - [[[3, 17], [], [[0, 32]]]]) - - def test_propn_direct_speech(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_direct_speech, 0.0) - - def test_governing_subjects(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - governing_subjects = [3, None, None, None, None, 3, None, None, None, None, None, None, None, None, None, None, 3, None, 17, 17, None, 3, None, None, None, None, 24, 28, 24, None, None, None, None, 50, None, None, 35, 35, 35, None, 35, 35, 35, None, None, None, None, None, 47, None, None, None, 50, 50, None, 54, None, None, 50, None, 50, 50, None, None, None, 64, None, None, 67, 67, None, 67, None, None, None, 73, None, 73, None, None, 73, 73, None, 82, None, None, None, None, None, None, None, None, None, None, None, None, 95, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 116, None, None, 116, 113, None, None, 126, 126, None, None, None, None, None, None, None, None, None, 134, 134, None, None, None, None, None, 143, None, 134, None, None, 148, None, None, None, None, None, None, None, None, None, None, 159, None, None, None, None, None, None, 167, None, None, None, None, 169, None, 169, None, None, None, 174, 174, None, 174, None, None, None, 185, None, None, None, 185, 185, 185, 185, None, 185, None, 202, 202, None, None, None, None, None, 197, 197, 202, None, 202, 202, 213, None, None, None, None, None, None, None, 202, 202, None, 202, None, 202, 202, None, None, None, None, None, None, None, 228, None, 230, 230, None, None, 234, 234, 238, None, None, 234, 234, None, None, 242, None, 248, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 262, 262, None, 262, None, None, None, None, None, None, None, None, None, None, None, 277, 277, None, 269, 269, None, 269, None, None, 269, None, None, 269, None, None, None, None, None, 269, 269, None, None, None, 299, None, None, None, None, None, None, None, None, None, None, None, 315, None, None, 315, None, None, 318, 318, 318, None, 318, 318, 318, 318, None, 318, 318, None, None, 318, 318, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 350, None, None, None, None, None, 354, None, None, None, None, None, None, 358, None, 367, None, None, 367, None, 370, None, 370, None, 377, None, 376, None, 377, None, 382, None, None, 386, None, None, 386, 390, 390, 386, 386, None, 392, None, None, None, None, 397, 397, None, 397, 397, 397, None, None, None, None, None, 407, 407, None, 411, 411, None, None, 411, None, None, None, 418, 418, 418, 418, None, 428, None, 428, None, None, None, None, None, 434, None, None, 434, 434, None, 442, 442, None, None, None, 442, None, 445, None, 445, None, 445, 445, None, None, None, None, None, 456, None, 456, None, None, None, 456, None, None, 465, None, None, None, None, None, None] - self.assertEqual(doc._.governing_subjects, - governing_subjects) - - def test_content_segments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - content_segments = [[0, 33], [99, 128], [194, 255], [337, 384], [384, 397], [397, 405]] - self.assertEqual(doc._.content_segments, - content_segments) - - def test_prompt_related(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_related = [[33, 2.9208434151399927, ['technology', 'tech', 'savviness', 'technological', 'wizardry', 'Technology'], [73, 114, 116, 169, 199, 268, 274, 275, 330, 354, 407, 442, 464]], [50, 1.9179541822056472, ['conformist', 'conformity', 'materialism', 'entitlement', 'selfishness'], [86, 191, 285, 293, 307, 431]], [41, 1.4803625377643503, ['surely', 'furthermore', 'generally', 'merely', 'certainly'], [0, 100, 296, 320, 366, 375, 433]]] - self.assertEqual(doc._.prompt_related, - prompt_related) - - def test_prompt_language(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_language = ['so', 'stupid', 'tempt', 'tempting', 'technology', 'conform', 'conformist', 'furthermore', 'assure', 'reassuring', 'support', 'after', 'all', 'easy', 'general', 'generally', 'however', 'argue', 'still', 'clear', 'certain', 'certainly', 'value', 'values', 'efficient', 'self', 'selfishness', 'beneficial', 'challenge'] - self.assertEqual(doc._.prompt_language, - prompt_language) - - def test_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - core_sentences = [[33, 99], [128, 183], [256, 313], [313, 337], [405, 439], [454, 473]] - self.assertEqual(doc._.core_sentences, - core_sentences) - - def test_extended_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - extended_core_sentences = [[183, 194], [439, 454]] - self.assertEqual(doc._.extended_core_sentences, - extended_core_sentences) diff --git a/tests/old_tests/test_viewpoint_perspective_features3.py b/tests/old_tests/test_viewpoint_perspective_features3.py deleted file mode 100644 index 08c1d54..0000000 --- a/tests/old_tests/test_viewpoint_perspective_features3.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_components.components.utility_functions import print_parse_tree -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -holmes_manager.parse_and_register_document( - document_text="In all actuality, I think it is more probable that our bodies will surely deteriorate long before our minds do in any significant amount. Who can't say that technology has made us lazier, but that's the key word, lazy, not stupid. The ever increasing amount of technology that we incorporate into our daily lives makes people think and learn every day, possibly more than ever before. Our abilities to think, learn, philosophize, etc. may even reach limits never dreamed of before by average people. Using technology to solve problems will continue to help us realize our potential as a human race.\n\nIf you think about it, using technology to solve more complicating problems gives humans a chance to expand their thinking and learning, opening up whole new worlds for many people. Many of these people are glad for the chance to expand their horizons by learning more, going to new places, and trying new things. If it wasn't for the invention of new technological devices, I wouldn't be sitting at this computer trying to philosophize about technology. It would be extremely hard for children in much poorer countries to learn and think for themselves with out the invention of the internet. Think what an impact the printing press, a technologically superior mackine at the time, had on the ability of the human race to learn and think.\n\nRight now we are seeing a golden age of technology, using it all the time during our every day lives. When we get up there's instant coffee and the microwave and all these great things that help us get ready for our day. But we aren't allowing our minds to deteriorate by using them, we are only making things easier for ourselves and saving time for other important things in our days. Going off to school or work in our cars instead of a horse and buggy. Think of the brain power and genius that was used to come up with that single invention that has changed the way we move across this globe.\n\nUsing technology to solve our continually more complicated problems as a human race is definately a good thing. Our ability to think for ourselves isn't deteriorating, it's continuing to grow, moving on to higher though functions and more ingenious ideas. The ability to use what technology we have is an example", label='GRE_Sample_Essay') - - -class ViewpointPerspectiveFeatureTest(unittest.TestCase): - - def test_vwp_perspective(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - perspective_spans = {'implicit': {'29': [26, 27, 28, 29, 36, 37], '33': [30, 31, 32, 33, 35], '39': [38, 39, 40, 42, 43, 44, 45, 46, 47, 48], '41': [41], '62': [49, 50, 51, 52, 53, 54, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75], '105': [99, 100, 101, 102, 103, 104, 105, 106, 107, 109, 111, 112, 113, 114, 115, 116], '206': [204, 205, 206, 207, 208, 209, 210, 211, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228], '213': [212, 213], '229': [229, 257], '245': [230, 231, 232, 233, 234, 235, 236, 237, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256], '239': [238, 239], '286': [285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 296, 300, 301, 302, 304, 305], '295': [295], '298': [297, 298], '332': [332], '354': [354, 355, 356, 357, 358, 359, 360, 381], '363': [361, 362, 363, 364, 365, 366, 367, 368, 369, 370], '373': [371, 372, 373, 374, 375], '396': [382, 383, 384, 385, 386, 391, 392, 393, 394, 395, 396, 397, 398, 400, 401], '399': [399], '437': [429, 430, 431, 432, 437, 438, 439]}, 'explicit_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 34, 55, 56, 57, 58, 59, 60, 61, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 108, 110, 159, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 299, 303, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 333, 334, 335, 336, 337, 345, 376, 377, 378, 379, 380, 387, 388, 389, 390, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 433, 434, 435, 436], 'explicit_2': [119], 'explicit_3': {132: [117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150], 154: [151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177], 341: [338, 339, 340, 341, 342, 343, 344, 346, 347, 348, 349, 350, 351, 352, 353]}} - - self.assertEqual(doc._.vwp_perspective_spans, - perspective_spans) - - def test_vwp_stance_markers(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - stance_markers = {'implicit': {'29': [27, 29, 37], '33': [35], '41': [41], '39': [47], '62': [64, 70], '105': [103], '206': [205, 208, 216, 218], '213': [213], '229': [229], '239': [239], '245': [254], '295': [295], '286': [301], '332': [332], '354': [354, 358], '396': [391], '399': [399]}, 'explicit_1': [0, 1, 2, 9, 14, 23, 87, 88, 91, 178, 191, 270, 306, 321, 324, 345, 390], 'explicit_2': [], 'explicit_3': {132: [118, 124, 130, 134, 148], 154: [151, 159]}} - self.assertEqual(doc._.vwp_stance_markers, - stance_markers) - - def test_propn_egocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_egocentric, - 0.46136363636363636) - - def test_propn_allocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_allocentric, 0.175) - - def test_propositional_attitudes(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - propositional_attitudes = {'implicit': [[[0, 25], 6, 7], [[26, 48], 26, 29], [[178, 203], 190, 194], [[204, 228], 204, 206], [[204, 228], 204, 206], [[229, 257], None, 229], [[306, 337], 307, 310]], 'implicit_3': [[[76, 98], 77, 81], [[354, 381], None, 354], [[429, 439], 430, 437]], 'explicit_1': [[[0, 25], 4, 5]], 'explicit_2': [], 'explicit_3': {154: [[[151, 177], 154, 156]], 383: [[[382, 401], 383, 396]]}} - self.assertEqual(doc._.vwp_propositional_attitudes, - propositional_attitudes) - - def test_emotional_states(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - emotion_states = {'explicit_1': [], 'explicit_2': [], 'explicit_3': {'People': [156]}} - self.assertEqual(doc._.vwp_emotion_states, - emotion_states) - - def test_character_traits(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - # No character traits detected in this essay. We may need another test - # with a different text. - character_traits = {'explicit_1': [35], 'explicit_2': [], 'explicit_3': {}} - self.assertEqual(doc._.vwp_character_traits, - character_traits) - - def test_subjectivity_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - subjectivity_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8888888888888888, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.39999999999999997, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.45454545454545453, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.45454545454545453, 0.0, 0.0, 0.0, 0.0, 0.45454545454545453, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.45454545454545453, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.5416666666666666, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5357142857142857, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6666666666666666, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.75, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.6000000000000001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.subjectivity_ratings, - subjectivity_ratings) - - def test_mean_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_subjectivity, - 0.11994355317884729) - - def test_med_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_subjectivity, 0.0) - - def test_max_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_subjectivity, 1.0) - - def test_min_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_subjectivity, 0.0) - - def test_std_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_subjectivity, - 0.28616783063337886) - - def test_polarity_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - polarity_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, -0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, -0.7999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.15, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.13636363636363635, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, -0.2916666666666667, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2857142857142857, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07142857142857142, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.polarity_ratings, - polarity_ratings) - - def test_mean_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_polarity, - 0.022459256429844664) - - def test_med_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_polarity, 0.0) - - def test_max_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_polarity, 0.8) - - def test_min_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_polarity, - -0.7999999999999999) - - def test_stdev_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_polarity, - 0.15416665096742294) - - def test_sentiment_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - sentiment_ratings = [0, 0.21, 0.30000000000000004, 0, -0.19, 0.42, 0, 0, 0, -0.34, 0, 0, 0, 0.08, 0.41000000000000003, -0.34, -0.05, 0, 0, 0, 0.1, 0, 0, 0.4, 0.1, 0, 0, 0, 0, -0.22, 0, 0.13, 0, 0.12, 0, 0, 0, 0, 0, 0, 0, -0.30000000000000004, -0.19, 0, 0.48, 0, 0.38, 0.54, 0, 0, 0, 0, 0.1, 0, 0.13, 0, 0, 0.09, 0, 0, 0.18, 0, 0, 0.17, 0.42, 0, 0.46, 0.1, 0.34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.42, 0, 0.46, 0, 0.4, 0, 0, 0, 0.13, 0.08, 0.19, 0, 0.39, 0.28, 0, 0, 0.1, 0.02, -0.17, 0, 0, 0.13, 0, 0.32, 0, 0.08, 0.15, 0, 0.48, 0, 0.36, 0, 0.46, -0.07, 0.04, 0.36, 0.11, 0, 0, 0, 0, 0.42, 0.11, 0, 0, 0, 0.13, 0, 0.32, 0, 0, 0, 0, 0, 0.04, 0.26, 0, 0.08, 0, 0.15, 0, 0.35000000000000003, 0, 0.23, 0.34, 0.21, 0.67, 0, 0, 0, 0.17, 0, 0, 0, 0, 0.17, 0, 0.63, 0, 0, 0.26, 0, 0.08, 0, 0, -0.1, 0.35000000000000003, 0, 0, -0.01, 0, 0.67, 0, 0, 0, -0.38, 0.67, 0, 0, 0, 0, 0, 0, 0, 0, -0.35000000000000003, 0, -0.67, -0.07, 0, 0, 0.19, 0, 0, -0.29, 0.16, 0, 0, -0.46, 0.38, 0, -0.4, -0.11, -0.13, 0, 0, 0, 0.29, 0.30000000000000004, -0.16, 0, 0, 0, 0.19, 0, 0, 0, 0.46, 0, 0.42, 0, 0, 0, -0.30000000000000004, 0, 0.35000000000000003, 0, 0, 0.42, 0, 0.42, 0, -0.07, 0.19, 0, 0.28, 0.09, 0, 0.04, 0, 0.34, 0, 0, 0, 0.15, 0, 0, 0.08, 0, 0.5, 0, 0, 0.36, 0.11, 0, 0.46, 0, 0.42, 0, 0, 0.58, 0.06, 0, 0, -0.02, 0.04, 0.6900000000000001, 0.19, 0, 0.13, 0, 0, 0, 0.21, 0, 0.15, 0, 0, 0.1, 0.34, 0, 0, 0, 0, 0.27, 0.34, 0, 0, 0.21, 0.5, 0, 0, 0.06, 0, 0.21, 0, 0.62, 0, 0, 0.48, 0, 0.27, 0.41000000000000003, 0, 0, 0.34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.34, 0.1, 0, 0, 0, 0, 0, 0.07, 0, 0, 0, 0, 0, 0, 0.25, 0.15, 0, 0.1, 0.45, 0, 0, 0, 0, 0, -0.01, -0.04, 0, 0.1, 0, 0.01, 0, 0, 0, -0.08, 0, 0.04, 0.26, 0, -0.08, 0, 0.42, 0, 0, 0.30000000000000004, 0.24, 0, 0.63, 0, 0, 0.13, 0, 0.16, 0.34, 0, 0, 0.09, 0.35000000000000003, 0, 0, 0.21, 0, 0.22, 0, -0.01, 0, 0, 0.28, 0, 0, 0, 0.13, 0, 0.32, 0, 0.12, 0, -0.29, 0, -0.07, 0.04, 0.36, 0.11, 0, 0, 0.04, 0.72, 0.13, 0, 0, -0.5, 0, -0.42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.32, 0, 0.15, 0.08, 0, 0.1, 0, 0, 0, 0, 0.15, 0, 0, 0, 0.5, 0, 0.04, 0, 0.13, 0, 0.21, 0, -0.07, -0.02] - self.assertEqual(doc._.sentiment_ratings, - sentiment_ratings) - - def test_mean_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_sentiment, 0.159) - - def test_med_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_sentiment, 0.13) - - def test_max_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_sentiment, 0.72) - - def test_min_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_sentiment, -0.67) - - def test_stdev_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_sentiment, - 0.2514541142984802) - - def test_tone_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - tone_ratings = [0.0, 0.105, 0.15000000000000002, 0.0, -0.19, 0.0, 0.0, 0.0, 0, -0.34, 0.0, 0.0, 0.0, 0.0, 0.5, -0.34, -0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15000000000000002, 0.0, 0.0, -0.48, 0.0, -0.38, -0.7999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.065, 0.04, 0.0, 0.0, -0.39, -0.28, 0.0, 0.0, -0.1, -0.15, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.13636363636363635, 0.0, 0.0, 0.25, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.13, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0, 0.0, -0.01, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, -0.38, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, -0.16, 0.0, 0.0, 0.0, -0.38, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, -0.2916666666666667, 0.0, 0.0, 0.0, 0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2857142857142857, 0.0, 0.0, 0.0, -0.02, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.41000000000000003, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.34, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.035, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.45, 0.0, 0.0, 0.0, 0.0, 0.0, -0.01, -0.04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.08, 0.0, 0.0, 0.0, 0.0, -0.08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07142857142857142, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, -0.5, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.72, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0, 0.15, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, -0.02] - self.assertEqual(doc._.tone_ratings, - tone_ratings) - - def test_mean_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_tone, - 0.009606315253374078) - - def test_med_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_tone, 0.0) - - def test_max_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_tone, 0.8) - - def test_min_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_tone, - -0.7999999999999999) - - def test_stdev_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_tone, - 0.17268539447948075) - - def test_vwp_arguments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_arguments = [0, 1, 2, 4, 5, 7, 8, 9, 10, 13, 14, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 37, 39, 66, 70, 71, 72, 76, 77, 78, 79, 81, 83, 87, 88, 90, 91, 93, 95, 99, 101, 102, 103, 104, 106, 107, 118, 120, 124, 126, 127, 128, 129, 130, 131, 133, 134, 135, 136, 138, 157, 158, 159, 160, 161, 164, 165, 166, 178, 190, 191, 192, 198, 199, 200, 201, 229, 230, 245, 246, 247, 248, 249, 253, 254, 306, 319, 320, 321, 322, 324, 325, 342, 347, 348, 354, 355, 356, 358, 361, 362, 363, 364, 383, 385, 386, 387, 389, 390, 391, 392, 399, 430, 431, 432, 438, 439] - self.assertEqual(doc._.vwp_arguments, - vwp_arguments) - - def test_propn_argument_words(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_argument_words, 0.275) - - def test_vwp_interactives(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_interactives = [4, 8, 11, 18, 27, 28, 34, 38, 39, 47, 56, 59, 71, 76, 88, 108, 110, 119, 128, 145, 153, 156, 166, 170, 175, 176, 181, 186, 190, 191, 192, 196, 207, 261, 272, 276, 282, 285, 286, 293, 294, 296, 299, 303, 307, 308, 309, 311, 319, 323, 326, 333, 335, 345, 368, 376, 379, 387, 389, 399, 400, 402, 407, 408, 409, 412, 413, 421, 425, 435] - self.assertEqual(doc._.vwp_interactives, - vwp_interactives) - - def test_propn_interactive(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_interactive, - 0.1590909090909091) - - # This text contains no quoted or direct speech. - # We need another test article - # to do proper regression on these features. - def test_vwp_quoted(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_quoted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.vwp_quoted, vwp_quoted) - - def test_vwp_direct_speech_spans(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.vwp_direct_speech_spans, []) - - def test_propn_direct_speech(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_direct_speech, 0.0) - - def test_governing_subjects(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - governing_subjects = [4, None, 4, None, None, 4, None, 6, 9, 6, None, None, 11, None, 12, 12, 19, None, None, 18, 19, 19, None, 19, 19, None, None, None, None, 26, None, None, None, 31, None, 34, None, None, None, 38, None, 38, 38, None, 38, None, None, None, None, None, None, None, None, None, None, None, None, 56, 55, None, 59, 59, 54, None, 63, None, 63, None, None, None, 63, 63, 63, 63, 63, None, None, 76, None, 76, None, 77, None, 77, None, 77, None, None, 86, 86, None, None, None, None, None, None, None, None, None, None, None, None, 100, None, None, 99, None, 99, None, 108, None, 110, 110, None, 110, 110, None, None, None, None, 119, 119, None, None, None, None, None, 125, None, None, None, 124, None, None, None, None, None, None, 137, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 154, 154, 154, None, 154, None, None, None, 162, 163, 163, 163, None, 154, 154, None, None, None, None, 154, None, None, None, None, None, 179, None, 179, None, None, None, None, None, None, None, None, None, None, None, 190, 190, None, None, 190, None, 190, 190, None, None, None, None, 204, 208, 204, None, None, None, 213, None, None, None, 210, None, 210, 210, None, 210, 210, None, None, None, None, None, None, None, None, None, None, None, None, 232, None, None, 239, None, None, None, None, None, None, 235, 235, None, 235, 235, None, 235, 235, None, None, None, None, None, None, 261, 261, None, None, 261, None, None, None, None, None, None, None, None, None, None, None, 271, None, None, 276, 270, None, None, None, 282, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 296, None, None, None, None, None, 303, None, None, None, None, None, 307, None, 311, None, 312, 312, 312, None, None, None, None, 319, 319, None, 323, 323, None, None, 319, None, 329, 329, 329, 329, 329, None, 335, None, None, None, None, None, None, None, None, None, 345, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 361, None, 361, None, 361, None, None, None, None, None, 370, None, None, None, 376, 376, None, None, None, None, None, None, None, 384, None, 390, 390, 387, 387, 391, None, 391, 391, 383, 383, None, 383, 383, None, None, 402, None, 402, 402, None, None, None, 403, None, None, None, 412, None, 412, None, 412, None, 423, 423, None, None, None, 426, None, None, None, None, None, None, None, None, None, None, 435, 430, None, 430] - self.assertEqual(doc._.governing_subjects, - governing_subjects) - - def test_content_segments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - content_segments = [[26, 49], [281, 306], [306, 338], [338, 354], [354, 382]] - self.assertEqual(doc._.content_segments, - content_segments) - - def test_prompt_related(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_related = [[16, 2.0255215718047395, ['technology', 'technological', 'device'], [31, 54, 100, 125, 187, 188, 202, 268, 384, 434]], [14, 1.7660044150110379, ['solve', 'problem', 'complicate', 'complicated'], [102, 103, 127, 129, 130, 386, 390, 391]], [18, 1.6330696606277109, ['body', 'mind', 'human', 'brain'], [12, 19, 114, 132, 251, 312, 357, 394]]] - self.assertEqual(doc._.prompt_related, - prompt_related) - - def test_prompt_language(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_language = ['think', 'mind', 'minds', 'technology', 'solve', 'human', 'complicate', 'complicating', 'chance', 'hard', 'instead', 'example'] - self.assertEqual(doc._.prompt_language, - prompt_language) - - def test_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - core_sentences = [[0, 26], [49, 76], [99, 117], [117, 151], [229, 258], [382, 402], [429, 440]] - self.assertEqual(doc._.core_sentences, - core_sentences) - - def test_extended_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - extended_core_sentences = [[76, 99], [151, 178], [178, 204], [204, 229], [258, 281], [402, 429]] - self.assertEqual(doc._.extended_core_sentences, - extended_core_sentences) - - -doc = holmes_manager.get_document('GRE_Sample_Essay') diff --git a/tests/old_tests/test_viewpoint_perspective_features4.py b/tests/old_tests/test_viewpoint_perspective_features4.py deleted file mode 100644 index ef93cf4..0000000 --- a/tests/old_tests/test_viewpoint_perspective_features4.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_components.components.utility_functions import print_parse_tree -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -holmes_manager.parse_and_register_document( - document_text="There is no current proof that advancing technology will deteriorate the ability of humans to think. On the contrary, advancements in technology had advanced our vast knowledge in many fields, opening opportunities for further understanding and achievement. For example, the problem of dibilitating illnesses and diseases such as alzheimer's disease is slowing being solved by the technological advancements in stem cell research. The future ability of growing new brain cells and the possibility to reverse the onset of alzheimer's is now becoming a reality. This shows our initiative as humans to better our health demonstrates greater ability of humans to think.\n\nOne aspect where the ability of humans may initially be seen as an example of deteriorating minds is the use of internet and cell phones. In the past humans had to seek out information in many different enviroments and aspects of life. Now humans can sit in a chair and type anything into a computer and get an answer. Our reliance on this type of technology can be detrimental if not regulated and regularily substituted for other information sources such as human interactions and hands on learning. I think if humans understand that we should not have such a reliance on computer technology, that we as a species will advance further by utilizing the opportunity of computer technology as well as the other sources of information outside of a computer. Supplementing our knowledge with internet access is surely a way for technology to solve problems while continually advancing the humaThere is no current proof that advancing technology will deteriorate the ability of humans to think. On the contrary, advancements in technology had advanced our vast knowledge in many fields, opening opportunities for further understanding and achievement. For example, the problem of dibilitating illnesses and diseases such as alzheimer's disease is slowing being solved by the technological advancements in stem cell research. The future ability of growing new brain cells and the possibility to reverse the onset of alzheimer's is now becoming a reality. This shows our initiative as humans to better our health demonstrates greater ability of humans to think.\n\nOne aspect where the ability of humans may initially be seen as an example of deteriorating minds is the use of internet and cell phones. In the past humans had to seek out information in many different enviroments and aspects of life. Now humans can sit in a chair and type anything into a computer and get an answer. Our reliance on this type of technology can be detrimental if not regulated and regularily substituted for other information sources such as human interactions and hands on learning. I think if humans understand that we should not have such a reliance on computer technology, that we as a species will advance further by utilizing the opportunity of computer technology as well as the other sources of information outside of a computer. Supplementing our knowledge with internet access is surely a way for technology to solve problems while continually advancing the human race.", label='GRE_Sample_Essay') - - -class ViewpointPerspectiveFeatureTest(unittest.TestCase): - - def test_vwp_perspective(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - perspective_spans = {'implicit': {'1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], '25': [17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], '30': [30], '57': [41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68], '51': [51], '89': [69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92], '94': [93, 94, 96, 97, 98, 99, 100, 110], '104': [104], '129': [111, 112, 113, 129, 130, 131, 132, 133, 134, 135, 136, 137], '122': [114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128], '142': [138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 149, 150, 151, 152, 153, 154, 155], '148': [148], '254': [248, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 283], '268': [268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], '292': [284, 285, 286, 287, 288, 289, 290, 291, 292, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], '297': [297], '324': [308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335], '318': [318], '356': [336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359], '361': [360, 361, 363, 364, 365, 366, 367, 377], '371': [371], '396': [378, 379, 380, 396, 397, 398, 399, 400, 401, 402, 403, 404], '389': [381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395], '409': [405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 416, 417, 418, 419, 420, 421, 422], '415': [415], '521': [515, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536]}, 'explicit_1': [26, 95, 101, 102, 103, 105, 106, 107, 108, 109, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 221, 247, 249, 293, 362, 368, 369, 370, 372, 373, 374, 375, 376, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 488, 514, 516], 'explicit_2': [], 'explicit_3': {157: [156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173], 206: [205, 206, 207, 208, 219, 220, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246], 424: [423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440], 473: [472, 473, 474, 475, 486, 487, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513]}} - - self.assertEqual(doc._.vwp_perspective_spans, - perspective_spans) - - def test_vwp_stance_markers(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - stance_markers = {'implicit': {'1': [4, 15], '25': [17, 20, 28, 34], '30': [30], '57': [41, 45, 52], '89': [79], '94': [94, 96], '104': [104], '122': [119, 122, 123, 124, 125, 126], '148': [148], '254': [250, 255, 262], '268': [271, 282], '292': [284, 287, 295, 301], '297': [297], '324': [308, 312, 319], '356': [346], '361': [361, 363], '371': [371], '389': [386, 389, 390, 391, 392, 393], '415': [415], '521': [517, 522, 529]}, 'explicit_1': [103, 109, 181, 183, 184, 210, 221, 370, 376, 448, 450, 451, 477, 488], 'explicit_2': [], 'explicit_3': {157: [158], 206: [205, 236], 424: [425], 473: [472, 503]}} - self.assertEqual(doc._.vwp_stance_markers, - stance_markers) - - def test_propn_egocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_egocentric, - 0.515828677839851) - - def test_propn_allocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_allocentric, - 0.186219739292365) - - def test_propositional_attitudes(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - propositional_attitudes = {'implicit': [[[0, 16], None, 1], [[0, 16], None, 4], [[156, 173], 157, 159], [[174, 202], 175, 182], [[248, 283], 248, 254], [[268, 282], None, 271], [[423, 440], 424, 426], [[441, 469], 442, 449], [[515, 536], 515, 521]], 'implicit_3': [[[17, 40], 21, 25], [[41, 68], 48, 57], [[69, 92], 76, 89], [[284, 307], 288, 292], [[308, 335], 315, 324], [[336, 359], 343, 356]], 'explicit_1': [[[203, 247], 203, 204], [[470, 514], 470, 471]], 'explicit_2': [], 'explicit_3': {206: [[[205, 218], 206, 207]], 473: [[[472, 485], 473, 474]]}} - self.assertEqual(doc._.vwp_propositional_attitudes, - propositional_attitudes) - - def test_emotional_states(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - emotion_states = {'explicit_1': [], 'explicit_2': [], 'explicit_3': {}} - self.assertEqual(doc._.vwp_emotion_states, - emotion_states) - - def test_character_traits(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - # No character traits detected in this essay. - # We may need another test with a different text. - character_traits = {'explicit_1': [], 'explicit_2': [], 'explicit_3': {}} - self.assertEqual(doc._.vwp_character_traits, - character_traits) - - def test_subjectivity_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - subjectivity_ratings = [0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 1.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.125, 0.0, 0.0, 0.0, 0.45454545454545453, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8500000000000001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.5, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.0, 0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8888888888888888, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 1.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.125, 0.0, 0.0, 0.0, 0.45454545454545453, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8500000000000001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.5, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.0, 0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8888888888888888, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0] - self.assertEqual(doc._.subjectivity_ratings, - subjectivity_ratings) - - def test_mean_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_subjectivity, - 0.045381197870824425) - - def test_med_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_subjectivity, 0.0) - - def test_max_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_subjectivity, 1.0) - - def test_min_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_subjectivity, 0.0) - - def test_std_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_subjectivity, - 0.1638519638285082) - - def test_polarity_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - polarity_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.45, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.45, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.polarity_ratings, - polarity_ratings) - - def test_mean_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_polarity, - 0.014824594492644285) - - def test_med_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_polarity, 0.0) - - def test_max_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_polarity, 0.5) - - def test_min_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_polarity, -0.25) - - def test_stdev_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_polarity, - 0.08965735351867955) - - def test_sentiment_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - sentiment_ratings = [0, 0, 0.08, -0.13, -0.34, 0, 0, 0.13, 0.08, -0.34, 0, 0.5, 0, 0, 0, 0.42, 0, 0.08, 0, -0.4, 0, 0, 0, 0.13, 0, 0.28, 0, 0.17, 0.5700000000000001, 0, 0, 0, 0, 0.23, 0, 0, 0.19, 0.53, 0, 0.5700000000000001, 0, 0, -0.02, 0, 0, -0.37, 0, 0, 0, 0, 0, -0.07, -0.07, 0, 0, -0.8300000000000001, 0, 0, 0, 0, -0.1, 0, 0.07, 0, 0, 0.02, -0.22, 0.37, 0, 0, 0.42, 0.5, 0, 0, 0.67, 0.30000000000000004, 0, 0, 0, 0.38, 0, -0.34, 0, -0.16, 0, 0, 0, 0, 0.06, 0.28, 0.04, 0.31, 0, 0, 0, 0, 0.42, -0.07, 0, 0, 0.44, 0, 0.46, 0, 0.62, 0.5, 0, 0, 0, 0.42, 0, 0, 0.27, -0.02, 0, 0, 0.5, 0, 0, 0.13, 0, 0.29, 0, -0.07, -0.07, -0.02, 0, 0, 0, 0, 0, 0.04, 0, 0.42, 0, -0.22, 0, 0, 0, 0, 0.02, 0, 0, 0, 0.31, -0.30000000000000004, 0.33, 0, 0, 0.22, 0, 0, 0, 0, 0.42, 0, 0.06, 0, 0.35000000000000003, 0.2, 0, 0.04, 0.22, 0, 0.26, 0, 0, 0.04, 0.46, 0, 0.27, -0.07, 0.27, 0, 0, 0.5700000000000001, 0.08, 0, 0.26, 0, 0.13, 0.35000000000000003, 0.29, -0.42, 0, 0.38, 0, 0, 0, 0, 0, -0.1, -0.33, 0, 0.07, 0.07, -0.36, 0, 0, 0, -0.08, -0.35000000000000003, 0, -0.19, 0.42, 0, 0, 0.42, 0, 0, 0, 0.38, -0.21, 0.07, -0.04, -0.5700000000000001, -0.08, -0.46, -0.13, 0, 0, 0, -0.07, 0.04, 0.06, 0.08, 0.27, 0.19, -0.1, 0, 0, 0.52, 0, 0.46, 0.13, -0.07, 0.65, -0.07, 0, 0.1, 0, 0, 0.33, 0.05, 0, 0.04, 0.46, 0, 0, 0, 0.5700000000000001, 0, 0.42, 0.42, 0, 0.41000000000000003, 0.04, 0.22, 0, 0.13, 0, 0.32, 0, 0, 0.12, 0, 0, 0, 0, 0.08, -0.13, -0.34, 0, 0, 0.13, 0.08, -0.34, 0, 0.5, 0, 0, 0, 0.42, 0, 0.08, 0, -0.4, 0, 0, 0, 0.13, 0, 0.28, 0, 0.17, 0.5700000000000001, 0, 0, 0, 0, 0.23, 0, 0, 0.19, 0.53, 0, 0.5700000000000001, 0, 0, -0.02, 0, 0, -0.37, 0, 0, 0, 0, 0, -0.07, -0.07, 0, 0, -0.8300000000000001, 0, 0, 0, 0, -0.1, 0, 0.07, 0, 0, 0.02, -0.22, 0.37, 0, 0, 0.42, 0.5, 0, 0, 0.67, 0.30000000000000004, 0, 0, 0, 0.38, 0, -0.34, 0, -0.16, 0, 0, 0, 0, 0.06, 0.28, 0.04, 0.31, 0, 0, 0, 0, 0.42, -0.07, 0, 0, 0.44, 0, 0.46, 0, 0.62, 0.5, 0, 0, 0, 0.42, 0, 0, 0.27, -0.02, 0, 0, 0.5, 0, 0, 0.13, 0, 0.29, 0, -0.07, -0.07, -0.02, 0, 0, 0, 0, 0, 0.04, 0, 0.42, 0, -0.22, 0, 0, 0, 0, 0.02, 0, 0, 0, 0.31, -0.30000000000000004, 0.33, 0, 0, 0.22, 0, 0, 0, 0, 0.42, 0, 0.06, 0, 0.35000000000000003, 0.2, 0, 0.04, 0.22, 0, 0.26, 0, 0, 0.04, 0.46, 0, 0.27, -0.07, 0.27, 0, 0, 0.5700000000000001, 0.08, 0, 0.26, 0, 0.13, 0.35000000000000003, 0.29, -0.42, 0, 0.38, 0, 0, 0, 0, 0, -0.1, -0.33, 0, 0.07, 0.07, -0.36, 0, 0, 0, -0.08, -0.35000000000000003, 0, -0.19, 0.42, 0, 0, 0.42, 0, 0, 0, 0.38, -0.21, 0.07, -0.04, -0.5700000000000001, -0.08, -0.46, -0.13, 0, 0, 0, -0.07, 0.04, 0.06, 0.08, 0.27, 0.19, -0.1, 0, 0, 0.52, 0, 0.46, 0.13, -0.07, 0.65, -0.07, 0, 0.1, 0, 0, 0.33, 0.05, 0, 0.04, 0.46, 0, 0, 0, 0.5700000000000001, 0, 0.42, 0.42, 0, 0.41000000000000003, 0.04, 0.22, 0, 0.13, 0, 0.32, 0, 0, 0.12, 0, 0, 0.36, 0.11, 0] - self.assertEqual(doc._.sentiment_ratings, - sentiment_ratings) - - def test_mean_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_sentiment, 0.11572614107883818) - - def test_med_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_sentiment, 0.0) - - def test_max_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_sentiment, 0.67) - - def test_min_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_sentiment, - -0.8300000000000001) - - def test_stdev_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_sentiment, - 0.2783455430853229) - - def test_tone_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - tone_ratings = [0.0, 0.0, -0.08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.34, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.28, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.02, 0.0, 0.0, -0.37, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, -0.07, 0.0, 0.0, -0.8300000000000001, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.22, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.19, 0.0, -0.34, 0.0, -0.16, 0.0, 0.0, 0.0, 0.0, 0.0, 0.28, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21, -0.07, 0.0, 0.0, 0.44, 0.0, 0.0, 0.0, 0.62, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.065, 0.0, 0.0, 0.0, -0.07, -0.07, -0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.22, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17500000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17500000000000002, 0.0, -0.42, 0.0, -0.38, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, -0.07, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.38, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.325, -0.07, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.34, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.28, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.02, 0.0, 0.0, -0.37, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, -0.07, 0.0, 0.0, -0.8300000000000001, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.22, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.19, 0.0, -0.34, 0.0, -0.16, 0.0, 0.0, 0.0, 0.0, 0.0, 0.28, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21, -0.07, 0.0, 0.0, 0.44, 0.0, 0.0, 0.0, 0.62, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.065, 0.0, 0.0, 0.0, -0.07, -0.07, -0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.22, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17500000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17500000000000002, 0.0, -0.42, 0.0, -0.38, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, -0.07, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.38, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.325, -0.07, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.tone_ratings, - tone_ratings) - - def test_mean_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_tone, -0.01023764617125613) - - def test_med_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_tone, 0.0) - - def test_max_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_tone, 0.62) - - def test_min_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_tone, - -0.8300000000000001) - - def test_stdev_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_tone, - 0.14640313944514674) - - def test_vwp_arguments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_arguments = [0, 1, 2, 4, 5, 6, 11, 14, 15, 17, 18, 19, 20, 24, 25, 26, 28, 29, 41, 42, 44, 45, 46, 51, 52, 56, 57, 58, 59, 60, 71, 78, 79, 80, 94, 95, 96, 97, 103, 104, 105, 106, 108, 109, 114, 116, 119, 122, 123, 124, 125, 126, 174, 175, 181, 183, 184, 185, 193, 194, 195, 197, 200, 201, 203, 204, 205, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 220, 221, 222, 225, 226, 227, 228, 231, 236, 237, 250, 254, 255, 261, 262, 263, 269, 271, 272, 273, 278, 281, 282, 284, 285, 286, 287, 291, 292, 293, 295, 296, 308, 309, 311, 312, 313, 318, 319, 323, 324, 325, 326, 327, 338, 345, 346, 347, 361, 362, 363, 364, 370, 371, 372, 373, 375, 376, 381, 383, 386, 389, 390, 391, 392, 393, 441, 442, 448, 450, 451, 452, 460, 461, 462, 464, 467, 468, 470, 471, 472, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 487, 488, 489, 492, 493, 494, 495, 498, 503, 504, 517, 521, 522, 528, 529, 530] - self.assertEqual(doc._.vwp_arguments, - vwp_arguments) - - def test_propn_argument_words(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_argument_words, - 0.3426443202979516) - - def test_vwp_interactives(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_interactives = [0, 26, 51, 54, 74, 86, 93, 95, 101, 140, 165, 174, 177, 194, 203, 209, 213, 221, 249, 293, 318, 321, 341, 353, 360, 362, 368, 407, 432, 441, 444, 461, 470, 476, 480, 488, 516] - self.assertEqual(doc._.vwp_interactives, - vwp_interactives) - - def test_propn_interactive(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_interactive, - 0.06890130353817504) - - # This text contains no quoted or direct speech. We need another test article - # to do proper regression on these features. - def test_vwp_quoted(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_quoted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.vwp_quoted, vwp_quoted) - - def test_vwp_direct_speech_spans(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.vwp_direct_speech_spans, []) - - def test_propn_direct_speech(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_direct_speech, 0.0) - - def test_governing_subjects(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - governing_subjects = [None, None, None, None, None, None, None, None, None, 7, None, None, None, None, None, None, None, 28, None, None, None, None, None, None, None, 21, None, 26, 26, 26, 31, None, None, 21, None, None, None, None, None, None, None, 48, None, None, None, None, None, None, None, None, None, None, None, None, None, 53, None, 48, None, 48, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 76, 76, None, 76, None, None, 93, None, 95, None, None, None, 98, None, 101, 102, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 116, None, None, None, None, None, None, 113, None, 113, 113, None, None, None, None, None, 141, None, 141, None, 141, None, 141, None, None, 146, None, None, None, None, None, None, None, None, 157, None, None, 157, 157, None, None, None, 157, None, 165, None, None, None, 157, None, None, None, None, 174, 174, None, None, None, None, None, 175, 175, None, None, 175, None, 175, 175, 175, None, None, None, None, None, None, None, None, None, None, None, None, None, 203, None, None, 206, None, None, None, None, 209, None, None, None, None, None, None, None, None, None, None, None, None, None, 221, 221, 221, 221, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 249, 250, None, None, 248, None, None, None, None, None, None, 259, None, None, 259, 259, None, None, None, None, None, None, None, None, None, None, 274, None, None, None, None, None, None, None, 295, None, None, None, None, None, None, None, 288, None, 293, 293, 293, 298, None, None, 288, None, None, None, None, None, None, None, 315, None, None, None, None, None, None, None, None, None, None, None, None, None, 320, None, 315, None, 315, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 343, 343, None, 343, None, None, 360, None, 362, None, None, None, 365, None, 368, 369, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 383, None, None, None, None, None, None, 380, None, 380, 380, None, None, None, None, None, 408, None, 408, None, 408, None, 408, None, None, 413, None, None, None, None, None, None, None, None, 424, None, None, 424, 424, None, None, None, 424, None, 432, None, None, None, 424, None, None, None, None, 441, 441, None, None, None, None, None, 442, 442, None, None, 442, None, 442, 442, 442, None, None, None, None, None, None, None, None, None, None, None, None, None, 470, None, None, 473, None, None, None, None, 476, None, None, None, None, None, None, None, None, None, None, None, None, None, 488, 488, 488, 488, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 516, 517, None, None, 515, 515, None, 515, None, None, None, 526, None, None, 526, 526, None, None, None, None] - self.assertEqual(doc._.governing_subjects, - governing_subjects) - - def test_content_segments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - content_segments = [[41, 111], [138, 174], [308, 378], [405, 441]] - self.assertEqual(doc._.content_segments, - content_segments) - - def test_prompt_related(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_related = [[23, 4.43795914259837, ['human', 'brain', 'mind'], [13, 75, 98, 107, 118, 128, 141, 157, 196, 206, 280, 342, 365, 374, 385, 395, 408, 424, 463, 473, 534]], [14, 3.966699314397649, ['technology', 'advancement', 'technological'], [7, 21, 23, 62, 63, 180, 218, 234, 259, 274, 288, 290, 329, 330, 447, 485, 501, 526]], [2, 3.8626609442060085, ['dibilitating', 'enviroment', 'detrimental'], [47, 150, 183, 314, 417, 450]]] - self.assertEqual(doc._.prompt_related, - prompt_related) - - def test_prompt_language(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_language = ['proof', 'technology', 'human', 'humans', 'contrary', 'knowledge', 'problem', 'brain', 'show', 'shows', 'initiate', 'initiative', 'demonstrate', 'demonstrates', 'initial', 'initially', 'mind', 'minds', 'detriment', 'detrimental', 'think', 'further', 'far', 'sure', 'surely'] - self.assertEqual(doc._.prompt_language, - prompt_language) - - def test_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - core_sentences = [[0, 17], [111, 138], [174, 203], [203, 248], [248, 284], [378, 405], [441, 470], [470, 515], [515, 537]] - self.assertEqual(doc._.core_sentences, - core_sentences) - - def test_extended_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - extended_core_sentences = [[17, 41], [284, 308]] - self.assertEqual(doc._.extended_core_sentences, - extended_core_sentences) diff --git a/tests/old_tests/test_viewpoint_perspective_features5.py b/tests/old_tests/test_viewpoint_perspective_features5.py deleted file mode 100644 index 49c25b2..0000000 --- a/tests/old_tests/test_viewpoint_perspective_features5.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_components.components.utility_functions import print_parse_tree -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# GRE Sample from https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/sample_responses -holmes_manager.parse_and_register_document( - document_text="In recent centuries, humans have developed the technology very rapidly, and you may accept some merit of it, and you may see a distortion in society occured by it. To be lazy for human in some meaning is one of the fashion issues in thesedays. There are many symptoms and resons of it. However, I can not agree with the statement that the technology make humans to be reluctant to thinkng thoroughly.\n\nOf course, you can see the phenomena of human laziness along with developed technology in some place. However, they would happen in specific condition, not general. What makes human to be laze of thinking is not merely technology, but the the tendency of human that they treat them as a magic stick and a black box. Not understanding the aims and theory of them couses the disapproval problems.\n\nThe most important thing to use the thechnology, regardless the new or old, is to comprehend the fundamental idea of them, and to adapt suit tech to tasks in need. Even if you recognize a method as a all-mighty and it is extremely over-spec to your needs, you can not see the result you want. In this procedure, humans have to consider as long as possible to acquire adequate functions. Therefore, humans can not escape from using their brain.\n\nIn addition, the technology as it is do not vain automatically, the is created by humans. Thus, the more developed tech and the more you want a convenient life, the more you think and emmit your creativity to breakthrough some banal method sarcastically.\n\nConsequently, if you are not passive to the new tech, but offensive to it, you would not lose your ability to think deeply. Furthermore, you may improve the ability by adopting it.", label='GRE_Sample_Essay') - - -class ViewpointPerspectiveFeatureTest(unittest.TestCase): - - def test_vwp_perspective(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - perspective_spans = {'implicit': {'6': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], '29': [29, 30, 31], '34': [33, 34, 35, 36, 37, 38, 39, 40], '41': [41, 42, 43, 44, 45, 46, 47, 48, 49], '51': [50, 51, 53, 54, 55, 56, 57, 58], '52': [52], '104': [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111], '113': [112, 113, 114, 115, 116, 117, 118, 119], '120': [120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143], '152': [144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156], '173': [157, 158, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], '160': [159, 160], '245': [240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250], '262': [251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263], '267': [264, 265, 266, 267, 268, 269, 270]}, 'explicit_1': [59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 79], 'explicit_2': [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 32, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 280, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339], 'explicit_3': {72: [72, 73, 74, 75, 76, 77, 78], 227: [223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239], 276: [271, 272, 273, 274, 275, 276, 277, 278, 279, 281, 282, 283, 284]}} - self.assertEqual(doc._.vwp_perspective_spans, - perspective_spans) - - def test_vwp_stance_markers(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - stance_markers = {'implicit': {'41': [46], '52': [52], '104': [100, 103], '152': [147, 149, 155], '160': [160], '173': [178], '245': [240, 243, 247], '262': [252]}, 'explicit_1': [62], 'explicit_2': [14, 15, 23, 81, 82, 84, 85, 192, 193, 194, 211, 214, 215, 220, 288, 293, 299, 302, 304, 308, 314, 320, 326, 327, 329, 332], 'explicit_3': {72: [75, 78], 227: [234, 237], 276: [283]}} - self.assertEqual(doc._.vwp_stance_markers, - stance_markers) - - def test_propn_egocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_egocentric, - 0.36764705882352944) - - def test_propn_allocentric(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_allocentric, - 0.11176470588235295) - - def test_propositional_attitudes(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - propositional_attitudes = {'implicit': [[[59, 79], 61, 64], [[80, 99], 84, 86], [[100, 111], 102, 104], [[112, 143], 113, 120], [[144, 156], 145, 152], [[192, 222], 214, 217], [[240, 250], 242, 245], [[301, 328], 319, 322], [[329, 339], 331, 333]], 'implicit_3': [[[33, 49], 34, 42], [[59, 79], None, 67]], 'explicit_1': [], 'explicit_2': [[[157, 191], None, 173], [[271, 300], 288, 289]], 'explicit_3': {}} - self.assertEqual(doc._.vwp_propositional_attitudes, - propositional_attitudes) - - def test_emotional_states(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - emotion_states = {'explicit_1': [], 'explicit_2': [15, 212, 221], 'explicit_3': {'Humans': [75], 'Human and Humans': [133], 'Tech': [190, 281]}} - self.assertEqual(doc._.vwp_emotion_states, - emotion_states) - - def test_character_traits(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - # No character traits detected in this essay. - # We may need another test with a different text. - character_traits = {'explicit_1': [], 'explicit_2': [293, 299, 308], 'explicit_3': {}} - self.assertEqual(doc._.vwp_character_traits, - character_traits) - - def test_subjectivity_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - subjectivity_ratings = [0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.125, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.43333333333333335, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.45454545454545453, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 1.0, 0.0, 0.0, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.3, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.45454545454545453, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.subjectivity_ratings, - subjectivity_ratings) - - def test_mean_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_subjectivity, - 0.0971136989732031) - - def test_med_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_subjectivity, 0.0) - - def test_max_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_subjectivity, 1.0) - - def test_min_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_subjectivity, 0.0) - - def test_std_subjectivity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_subjectivity, - 0.23942774650308649) - - def test_polarity_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - polarity_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, -0.16666666666666666, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.05, 0.0, 0.0, 0.0, 0.0, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.polarity_ratings, - polarity_ratings) - - def test_mean_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_polarity, - 0.007970197846230905) - - def test_med_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_polarity, 0.0) - - def test_max_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_polarity, 0.5) - - def test_min_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_polarity, -0.5) - - def test_stdev_polarity(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_polarity, - 0.10006321173117026) - - def test_sentiment_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - sentiment_ratings = [0, 0.1, 0, 0, 0, 0.21, 0.26, 0, 0.13, 0.19, 0, 0, 0, 0, 0.13, 0.34, 0.06, 0.48, 0, 0, 0, 0, 0, 0.13, 0.31, 0.04, -0.37, 0, 0.06, 0, -0.1, 0, 0, 0, 0.29, -0.48, 0, 0.36, 0, 0.06, 0.36, 0, 0.27, 0, 0, 0.06, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.08, 0, 0.19, -0.35000000000000003, 0.38, -0.54, 0, 0, -0.21, 0, 0, 0.13, 0.27, 0, 0, 0.29, -0.31, 0, 0, 0.27, 0, 0, 0, -0.06, 0, 0, 0.35000000000000003, 0.31, 0, 0, 0, 0.36, -0.27, 0, 0, 0.26, 0.13, 0, 0.06, 0.21, 0, -0.08, 0, 0, 0, -0.12, 0, -0.12, 0.17, 0, 0.38, -0.08, 0, 0, 0, 0.36, 0, 0.29, 0, 0, 0.15, 0, 0.38, 0, -0.13, 0, 0, 0, 0, 0.03, 0, -0.36, 0, 0, 0.46, 0, -0.07, 0.04, 0.54, 0.06, 0, 0.04, 0.1, 0.08, 0, 0.38, -0.53, 0, 0, 0, -0.16, 0, 0, 0, 0, -0.4, 0, 0, 0, 0, 0, 0.45, 0.13, 0, 0.04, 0, 0, 0, -0.28, 0, 0.67, 0, -0.45, 0, 0, 0, 0.39, 0, -0.01, 0.51, 0, 0, 0, 0, 0, 0.21, 0.22, 0.19, 0, 0, 0, 0.11, 0, 0.08, 0, 0, 0.28, 0.04, 0.05, -0.07, 0.04, 0.21, 0, 0.36, 0, 0, 0, 0.30000000000000004, 0.17, 0, -0.12, 0, 0, 0.15, 0, 0, -0.35000000000000003, 0.38, -0.31, 0, -0.27, 0, 0.25, 0, 0, 0, -0.18, 0, 0, 0.21, 0, 0.39, -0.07, -0.05, -0.07, 0.53, 0, 0.12, -0.07, 0, 0, 0, 0, 0, -0.35000000000000003, 0.38, -0.12, 0, 0, 0, -0.30000000000000004, 0, 0, 0, 0, 0, 0, -0.13, -0.07, 0, 0, -0.1, 0.38, 0.44, -0.26, 0, 0, 0, 0, -0.1, 0, 0, 0, 0, 0, 0, 0.26, 0.19, 0, 0, 0, 0, 0.25, 0.04, 0.53, 0.42, 0, 0, 0, 0, 0.42, 0, 0, 0, 0.68, 0, 0.33, 0.06, -0.28, 0.05, -0.19, 0, 0, 0, 0, 0, 0, 0, 0.38, 0.04, 0, 0, -0.67, -0.19, 0, 0, 0.30000000000000004, 0, 0, 0, 0, 0, 0.38, 0.35000000000000003, 0, -0.5, 0, -0.42, 0, 0, 0, 0, 0, 0.13, 0.28, 0, 0.5, -0.1, 0, 0, 0] - self.assertEqual(doc._.sentiment_ratings, - sentiment_ratings) - - def test_mean_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_sentiment, 0.059669421487603305) - - def test_med_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_sentiment, 0.0) - - def test_max_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_sentiment, 0.68) - - def test_min_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_sentiment, -0.67) - - def test_stdev_sentiment(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_sentiment, - 0.2683496906535001) - - def test_tone_ratings(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - tone_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.19, 0.0, 0.0, 0.0, 0.0, 0.065, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.065, 0.0, 0.0, -0.37, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, -0.48, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, 0.17500000000000002, -0.38, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.31, 0.0, 0.0, 0.135, 0.0, 0.0, 0.0, -0.06, 0.0, 0.0, 0.17500000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, -0.27, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.17, 0.0, -0.38, 0.05000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.38, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, -0.03, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.5, 0.0, 0.0, 0.0, -0.16666666666666666, 0.0, 0.0, -0.38, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.4, 0.0, 0.0, 0.0, 0.0, 0, 0.45, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.28, 0.0, 0.13636363636363635, 0.0, -0.45, 0.0, 0.0, 0.0, 0.0, 0.0, -0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.36, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, -0.12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17500000000000002, -0.38, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.18, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, -0.05, -0.07, 0.265, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17500000000000002, -0.38, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, -0.38, -0.44, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0.1, 0.0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0.265, 0.0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.34, 0.0, 0.0, 0.0, -0.3, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.38, -0.04, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.0, 0.0, -0.38, -0.35000000000000003, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.065, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0] - self.assertEqual(doc._.tone_ratings, - tone_ratings) - - def test_mean_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.mean_tone, - -0.02251189581768094) - - def test_med_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.median_tone, 0.0) - - def test_max_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.max_tone, 0.5) - - def test_min_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.min_tone, -0.5) - - def test_stdev_tone(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.stdev_tone, - 0.15157085308452187) - - def test_vwp_arguments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_arguments = [5, 6, 13, 14, 15, 16, 22, 23, 24, 59, 61, 62, 63, 64, 65, 66, 67, 68, 71, 73, 74, 81, 82, 84, 85, 86, 92, 93, 94, 100, 103, 104, 125, 144, 145, 146, 147, 149, 150, 152, 153, 155, 175, 176, 178, 179, 192, 193, 194, 195, 196, 197, 198, 205, 214, 215, 216, 217, 218, 219, 220, 221, 228, 229, 230, 240, 243, 244, 246, 247, 252, 253, 271, 280, 281, 287, 288, 289, 302, 304, 306, 314, 319, 320, 321, 322, 323, 324, 325, 326, 327, 329, 331, 332, 333, 334, 335, 336] - self.assertEqual(doc._.vwp_arguments, - vwp_arguments) - - def test_propn_argument_words(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_argument_words, - 0.28823529411764703) - - def test_vwp_interactives(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_interactives = [9, 13, 22, 50, 61, 78, 84, 161, 169, 192, 194, 202, 206, 211, 214, 220, 224, 234, 274, 279, 280, 287, 288, 292, 305, 311, 319, 323, 331] - self.assertEqual(doc._.vwp_interactives, - vwp_interactives) - - def test_propn_interactive(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_interactive, - 0.08529411764705883) - - # This text contains no quoted or direct speech. We need - # another test article to do proper regression on these features. - def test_vwp_quoted(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - vwp_quoted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.vwp_quoted, - vwp_quoted) - - def test_vwp_direct_speech_spans(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.vwp_direct_speech_spans, - [[[288], [280], [[271, 300]]]]) - - def test_propn_direct_speech(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - self.assertEqual(doc._.propn_direct_speech, - 0.08529411764705883) - - def test_governing_subjects(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - governing_subjects = [8, 8, 8, None, None, None, 4, None, None, 4, 4, None, None, None, None, 13, None, None, None, None, None, None, None, None, 22, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 34, 34, 34, None, None, None, 34, 34, None, None, None, None, None, None, None, None, None, None, 61, None, None, None, None, 61, 61, None, None, None, None, None, 70, None, None, 72, 72, None, 72, 72, None, None, 84, 84, None, None, None, 84, None, None, None, None, None, 88, 88, None, None, 88, None, None, None, 102, None, None, None, 102, 102, 102, 102, None, None, 102, None, None, 112, None, None, 114, 114, 114, None, 113, None, 113, 113, None, None, None, None, None, None, None, None, None, 132, None, 134, None, 134, 134, None, None, 142, None, None, None, None, None, None, None, None, None, None, 145, None, None, None, None, None, None, 160, None, None, None, None, None, None, None, 169, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 186, None, 186, 186, None, 194, None, None, 194, None, None, None, None, 202, None, None, None, None, 204, 204, None, None, 204, 204, None, 211, None, None, None, None, 214, None, None, None, 220, None, 227, None, None, None, None, 227, None, 227, 227, 227, 227, 227, None, 227, None, None, None, 242, None, None, None, None, 242, 242, 242, None, 248, None, None, 256, None, None, None, None, None, None, 258, None, None, 256, 256, None, None, None, 265, None, None, None, 276, None, None, 275, None, None, None, None, None, None, 276, None, None, None, None, None, 288, None, 288, None, 288, None, 292, None, 288, None, None, None, 288, None, None, 319, None, None, None, 305, None, 305, 305, None, 305, 305, None, None, 305, 305, None, None, None, None, None, 319, None, 323, None, 323, 323, None, 331, None, None, None, 331, None, None, 335, 335, None, None] - self.assertEqual(doc._.governing_subjects, - governing_subjects) - - def test_content_segments(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - content_segments = [[33, 59], [100, 192], [192, 223], [271, 300]] - self.assertEqual(doc._.content_segments, - content_segments) - - def test_prompt_related(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_related = [[23, 5.423728813559322, ['thesedays', 'reson', 'thinkng', 'emmit'], [48, 55, 77, 291]], [11, 2.024701356549909, ['human', 'brain'], [4, 37, 72, 90, 114, 130, 227, 242, 249, 269]], [9, 1.619433198380567, ['technology', 'tech'], [8, 70, 95, 123, 186, 256, 276, 312]]] - self.assertEqual(doc._.prompt_related, - prompt_related) - - def test_prompt_language(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - prompt_language = ['human', 'humans', 'technology', 'however', 'agree', 'of', 'course', 'problem', 'problems', 'regard', 'regardless', 'even', 'result', 'therefore', 'add', 'addition', 'thus', 'consequent', 'consequently', 'furthermore'] - self.assertEqual(doc._.prompt_language, - prompt_language) - - def test_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - core_sentences = [[0, 33], [59, 80], [80, 100], [251, 271]] - self.assertEqual(doc._.core_sentences, - core_sentences) - - def test_extended_core_sentences(self): - doc = holmes_manager.get_document('GRE_Sample_Essay') - extended_core_sentences = [[223, 240], [240, 251], [301, 329], [329, 340]] - self.assertEqual(doc._.extended_core_sentences, - extended_core_sentences) diff --git a/tests/old_tests/test_viewpoint_perspective_features6.py b/tests/old_tests/test_viewpoint_perspective_features6.py deleted file mode 100644 index e7e90c2..0000000 --- a/tests/old_tests/test_viewpoint_perspective_features6.py +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_components.components.utility_functions import print_parse_tree -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -# Aesop's fable is a public domain document available at http://read.gov/aesop/007.html -document_text = "A lion lay asleep in the forest, his great head resting on his paws. A timid little mouse came upon him unexpectedly, and in her fright and haste to get away, ran across the lion's nose. Roused from his nap, the lion laid his huge paw angrily on the tiny creature to kill her.\n\n\"Spare me!\" begged the poor mouse. \"Please let me go and some day I will surely repay you.\"\n\nThe lion was much amused to think that a mouse could ever help him. But he was generous and finally let the mouse go.\n\nSome days later, while stalking his prey in the forest, the lion was caught in the toils of a hunter's net. Unable to free himself, he filled the forest with his angry roaring. The mouse knew the voice and quickly found the lion struggling in the net. Running to one of the great ropes that bound him, she gnawed it until it parted, and soon the lion was free.\n\n\"You laughed when I said I would repay you,\" said the Mouse. \"Now you see that even a Mouse can help a Lion.\"" - -holmes_manager.parse_and_register_document(document_text, label='Aesop') - - -class ViewpointPerspectiveFeatureTest2(unittest.TestCase): - - def test_vwp_perspective(self): - doc = holmes_manager.get_document('Aesop') - perspective_spans = {'implicit': {'2': [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 15], '9': [9], '11': [11, 12, 13, 14], '68': [62, 63, 64, 66, 67, 68, 69, 71, 72], '70': [70], '75': [73, 74, 75, 77, 78, 87], '106': [104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114], '131': [115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140], '147': [141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154], '182': [170, 171, 172, 173, 174, 176, 180, 181, 182, 183, 187, 188], '175': [175], '178': [177, 178, 179], '186': [184, 185, 186], '192': [189, 190, 191, 192, 193, 194]}, 'explicit_1': [65, 76, 79, 80, 81, 82, 83, 84, 85, 86, 200, 202], 'explicit_2': [197, 205, 214], 'explicit_3': {19: [1, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41], 48: [19, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], 90: [88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103], 156: [90, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169], 197: [195, 196, 198], 200: [199, 201, 203, 204], 210: [206, 207, 208, 209, 210, 211], 214: [210, 212, 213, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225]}} - self.assertEqual(doc._.vwp_perspective_spans, - perspective_spans) - - def test_vwp_stance_markers(self): - doc = holmes_manager.get_document('Aesop') - stance_markers = {'implicit': {'9': [9], '68': [66], '70': [70], '106': [104, 107, 109], '131': [120], '175': [175], '186': [184], '192': [193]}, 'explicit_1': [83, 200, 202], 'explicit_2': [], 'explicit_3': {19: [17], 90: [99], 200: [203], 214: [217, 220]}} - self.assertEqual(doc._.vwp_stance_markers, - stance_markers) - - def test_propn_egocentric(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.propn_egocentric, - 0.1504424778761062) - - def test_propn_allocentric(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.propn_allocentric, - 0.3672566371681416) - - def test_propositional_attitudes(self): - doc = holmes_manager.get_document('Aesop') - propositional_attitudes = {'implicit': [[[42, 61], 48, 49], [[104, 114], 105, 106], [[212, 225], 214, 215]], 'implicit_3': [[[0, 15], 1, 2]], 'explicit_1': [], 'explicit_2': [], 'explicit_3': {90: [[[88, 103], 90, 95]], 156: [[[155, 169], 156, 162]], 210: [[[195, 211], 210, 208]]}} - self.assertEqual(doc._.vwp_propositional_attitudes, - propositional_attitudes) - - def test_emotion_states(self): - doc = holmes_manager.get_document('Aesop') - emotion_states = {'explicit_1': [], 'explicit_2': [], 'explicit_3': {'Mouse': [28], 'Lion': [53, 93, 152, 165]}} - self.assertEqual(doc._.vwp_emotion_states, - emotion_states) - - def test_character_traits(self): - doc = holmes_manager.get_document('Aesop') - character_traits = {'explicit_1': [], 'explicit_2': [], 'explicit_3': {'Mouse': [17], 'Lion': [107]}} - self.assertEqual(doc._.vwp_character_traits, - character_traits) - - def test_subjectivity_ratings(self): - doc = holmes_manager.get_document('Aesop') - subjectivity_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.75, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 1.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8888888888888888, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.8, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.75, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.subjectivity_ratings, - subjectivity_ratings) - - def test_mean_subjectivity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.mean_subjectivity, - 0.1446998722860792) - - def test_med_subjectivity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.median_subjectivity, 0.0) - - def test_max_subjectivity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.max_subjectivity, 1.0) - - def test_min_subjectivity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.min_subjectivity, 0.0) - - def test_std_subjectivity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.stdev_subjectivity, - 0.31240836502706953) - - def test_polarity_ratings(self): - doc = holmes_manager.get_document('Aesop') - polarity_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1875, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4000000000000001, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.4, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.polarity_ratings, - polarity_ratings) - - def test_mean_polarity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.mean_polarity, 0.03156130268199234) - - def test_med_polarity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.median_polarity, 0.0) - - def test_max_polarity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.max_polarity, 0.8) - - def test_min_polarity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.min_polarity, -0.5) - - def test_stdev_polarity(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.stdev_polarity, - 0.2057206641937544) - - def test_sentiment_ratings(self): - doc = holmes_manager.get_document('Aesop') - sentiment_ratings = [0.04, 0.21, 0.29, 0.37, 0, 0, 0.42, 0, 0, 0.62, 0.21, 0, 0.08, 0, 0, 0, 0.04, -0.4, 0.22, -0.05, 0, 0, 0, 0.33, 0, 0, 0, 0, -0.43, 0, -0.22, 0, 0.27, -0.01, 0, 0, 0, 0, 0.21, 0, 0.12, 0, 0, 0, 0, 0.38, 0, 0, 0.21, 0, 0, 0.18, 0.1, -0.07, 0.08, 0, 0.02, 0.26, 0, -0.79, 0, 0, 0, 0, 0.14, 0, 0, 0, 0, 0, -0.33, -0.05, 0, 0, 0.25, 0.23, 0, 0.33, 0, 0.06, 0.34, -0.19, 0.08, 0.41000000000000003, 0.04, 0, 0, 0, 0, 0, 0.21, 0, 0.19, 0.51, 0, 0.42, 0, 0.04, -0.05, 0, 0, 0.48, 0, 0, 0, 0, 0, 0.6000000000000001, 0, 0, 0.23, 0, -0.05, 0.33, 0, 0, 0.06, 0, 0, 0, 0, 0, 0, -0.31, 0, 0, 0.42, 0, 0, 0.21, 0, 0, 0, 0, 0, 0, 0.04, 0.25, 0, 7.000000000000001e-05, 0, -0.51, 0, 0.81, 0, 0, 0, 0.12, 0, 0.42, 0, 0, -0.61, 0.21, 0, 0, -0.05, 0, 0, 0.37, 0, 0, 0, 0, 0.21, -0.5, 0, 0, 7.000000000000001e-05, 0, 0.38, 0, 0.27, 0, 0, 0.62, 0, 0, -0.23, 0, 0, 0, 0, 0, 0, 0, 0.12, 0, 0, -0.07, 0, 0.21, 0, 0.81, 0, 0, 0, 0, 0, 0, -0.19, 0, -0.19, 0, 0.04, 0, 0, 0, 0, 0, -0.05, 0, 0, 0.06, 0, 0.31, 0, 0.08, 0.04, -0.05, 0.35000000000000003, 0.48, 0.04, 0.21, 0, 0] - self.assertEqual(doc._.sentiment_ratings, - sentiment_ratings) - - def test_mean_sentiment(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.mean_sentiment, 0.10138091954022989) - - def test_med_sentiment(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.median_sentiment, 0.02) - - def test_max_sentiment(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.max_sentiment, 0.81) - - def test_min_sentiment(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.min_sentiment, -0.79) - - def test_stdev_sentiment(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.stdev_sentiment, 0.285942060038646) - - def test_tone_ratings(self): - doc = holmes_manager.get_document('Aesop') - tone_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.4, -0.1875, -0.05, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, -0.43, 0.0, -0.22, 0.0, 0.0, -0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, -0.79, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.4, -0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.19, 0.0, 0.0, 0.0, 0.0, 0.0, -0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.31, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.51, 0.0, 0.4, 0.0, 0.0, 0.0, 0.12, 0.0, 0.0, 0.0, 0.0, -0.61, 0.0, 0.0, 0.0, -0.05, 0.0, 0.0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, -0.23, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.81, 0.0, 0.0, 0.0, 0.0, 0, 0.0, -0.19, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04, 0.0, -0.05, 0.17500000000000002, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.tone_ratings, tone_ratings) - - def test_mean_tone(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.mean_tone, -0.020775862068965516) - - def test_med_tone(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.median_tone, 0.0) - - def test_max_tone(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.max_tone, 0.81) - - def test_min_tone(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.min_tone, -0.79) - - def test_stdev_tone(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.stdev_tone, - 0.2382495242512233) - - def test_vwp_arguments(self): - doc = holmes_manager.get_document('Aesop') - vwp_arguments = [64, 66, 68, 70, 95, 96, 99, 104, 109, 110, 120, 157, 162, 165, 201, 202, 203, 205, 208, 214, 215, 216, 217, 220, 221] - self.assertEqual(doc._.vwp_arguments, - vwp_arguments) - - def test_propn_argument_words(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.propn_argument_words, - 0.11061946902654868) - - def test_vwp_interactives(self): - doc = holmes_manager.get_document('Aesop') - vwp_interactives = [39, 51, 65, 74, 76, 81, 85, 138, 197, 200, 202, 205, 210, 214, 217] - self.assertEqual(doc._.vwp_interactives, vwp_interactives) - - def test_propn_interactive(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.propn_interactive, 0.06637168141592921) - - def test_vwp_quoted(self): - doc = holmes_manager.get_document('Aesop') - vwp_quoted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0] - self.assertEqual(doc._.vwp_quoted, vwp_quoted) - - def test_vwp_direct_speech_spans(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.vwp_direct_speech_spans, [[[71, 76, 81], [71, 85], [[62, 72], [73, 87]]], [[210], [214], [[195, 211], [212, 225]]]]) - - def test_propn_direct_speech(self): - doc = holmes_manager.get_document('Aesop') - self.assertEqual(doc._.propn_direct_speech, 0.2345132743362832) - - def test_governing_subjects(self): - doc = holmes_manager.get_document('Aesop') - governing_subjects = [None, None, 1, 1, 1, None, None, None, None, 10, 8, 8, 8, None, 13, None, None, 19, 19, None, 19, None, None, 19, None, None, 19, None, 27, None, 27, None, 27, 27, None, 19, 19, None, None, None, 38, None, 48, 48, None, 44, None, None, None, 48, None, 52, 50, 48, 52, None, 57, None, None, 48, None, None, None, None, None, None, None, None, 64, None, 71, None, None, None, None, None, None, 76, None, None, None, None, None, 81, 81, None, None, None, None, None, None, 90, 93, 90, None, 90, None, None, None, None, 98, 98, None, None, None, None, 105, 105, None, 105, 105, None, None, 112, None, None, None, None, 129, None, None, 129, None, 122, 123, None, None, None, None, None, None, 129, 129, None, None, None, None, None, None, 137, None, 146, None, 146, None, None, None, 146, None, None, 149, None, 151, 151, None, None, None, 156, None, None, None, 156, 156, None, None, 164, 164, None, None, None, 181, 181, 181, 181, None, 176, 181, None, 176, None, None, None, 181, None, None, None, 185, None, None, 191, None, None, 191, 191, None, None, None, None, 197, None, None, 200, None, None, 202, None, None, None, 210, None, None, None, None, 214, None, 214, None, None, None, None, None, 219, None, None, None, None] - self.assertEqual(doc._.governing_subjects, - governing_subjects) - - def test_content_segments(self): - doc = holmes_manager.get_document('Aesop') - content_segments = [[0, 61], [62, 87], [87, 194], [195, 212], [212, 226]] - self.assertEqual(doc._.content_segments, - content_segments) - - def test_prompt_related(self): - doc = holmes_manager.get_document('Aesop') - prompt_related = [[5, 2.444390124663897, ['lion', 'paw', 'Lion'], [1, 14, 38, 48, 52, 90, 129, 164, 191, 223]], [4, 1.6241299303944317, ['mouse', 'Mouse'], [19, 71, 98, 112, 156, 210, 219]], [9, 1.1996161228406907, ['angrily', 'amused', 'angry', 'voice', 'laugh'], [53, 93, 152, 159, 198]]] - self.assertEqual(doc._.prompt_related, - prompt_related) - - def test_prompt_language(self): - doc = holmes_manager.get_document('Aesop') - prompt_language = ['lion', - 'paw', - 'paws', - 'mouse', - 'angry', - 'angrily', - 'final', - 'finally', - 'find', - 'found'] - self.assertEqual(doc._.prompt_language, prompt_language) - - def test_core_sentences(self): - doc = holmes_manager.get_document('Aesop') - core_sentences = [] - self.assertEqual(doc._.core_sentences, - core_sentences) - - def test_core_sentences(self): - doc = holmes_manager.get_document('Aesop') - extended_core_sentences = [] - self.assertEqual(doc._.extended_core_sentences, - extended_core_sentences) diff --git a/tests/old_tests/test_viewpoint_perspective_features7.py b/tests/old_tests/test_viewpoint_perspective_features7.py deleted file mode 100644 index 57729aa..0000000 --- a/tests/old_tests/test_viewpoint_perspective_features7.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python3.10 -# Copyright 2022, Educational Testing Service - -import holmes_extractor.manager as holmes -import unittest -from awe_components.components.utility_functions import print_parse_tree -from awe_workbench.pipeline import pipeline_def - -holmes_manager = holmes.Manager( - 'en_core_web_lg', perform_coreference_resolution=False, number_of_workers=2, extra_components=pipeline_def) - -document_text = "She took me by the hand and walked me into the lobby like a five-year old child. Didn’t she know I was pushing 15? This was the third home Nancy was placing me in - in a span of eight months. I guess she felt a little sorry for me. The bright fluorescent lights threatened to burn my skin as I walked towards a bouncy-looking lady with curly hair and a sweetly-smiling man. They called themselves Allie and Alex. Cute, I thought.\n\nAfter they exchanged the usual reams of paperwork, it was off in their Chevy Suburban to get situated into another new home. This time, there were no other foster children and no other biological children. Anything could happen.\n\nOver the next few weeks, Allie, Alex, and I fell into quite a nice routine. She’d make pancakes for breakfast, or he’d fry up some sausage and eggs. They sang a lot, even danced as they cooked. They must have just bought the house because, most weekends, we were painting a living room butter yellow or staining a coffee table mocha brown.\n\nI kept waiting for the other shoe to drop. When would they start threatening a loss of pancakes if I didn’t mow the lawn? When would the sausage and eggs be replaced with unidentifiable slosh because he didn’t feel like cooking in the morning? But, it never happened. They kept cooking, singing, and dancing like a couple of happy fools.\n\nIt was a Saturday afternoon when Allie decided it was time to paint the brick fireplace white. As we crawled closer to the dirty old firepit, we pulled out the petrified wood and noticed a teeny, tiny treasure box. We looked at each other in wonder and excitement. She actually said, “I wonder if the leprechauns left it!” While judging her for being such a silly woman, I couldn’t help but laugh and lean into her a little.\n\nTogether, we reached for the box and pulled it out. Inside was a shimmering solitaire ring. Folded underneath was a short piece of paper that read:\n\n“My darling, my heart. Only 80 days have passed since I first held your hand. I simply cannot imagine my next 80 years without you in them. Will you take this ring, take my heart, and build a life with me? This tiny little solitaire is my offering to you. Will you be my bride?”\n\nAs I stared up at Allie, she asked me a question. “Do you know what today is?” I shook my head. “It’s May 20th. That’s 80 days since Nancy passed your hand into mine and we took you home.”\n\nIt turns out, love comes in all shapes and sizes, even a teeny, tiny treasure box from a wonderfully silly lady who believes in leprechauns." - -holmes_manager.parse_and_register_document(document_text, label='Personal Narrative') - - -class ViewpointPerspectiveFeatureTest2(unittest.TestCase): - - def test_vwp_perspective(self): - doc = holmes_manager.get_document('Personal Narrative') - perspective_spans = {'implicit': {'30': [29, 30, 31, 32, 33, 46], '36': [34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45], '61': [57, 58, 59, 60, 61, 62, 63, 65, 84], '73': [71, 72, 73], '86': [85, 86, 87, 88, 89, 90, 91], '108': [97, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121], '100': [98, 99, 100, 101, 103, 104, 105], '102': [102], '126': [122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136], '139': [137, 138, 139, 140], '163': [161, 162, 163, 164, 165, 166, 167, 168], '171': [169, 170, 171, 172, 173, 174, 175, 176, 177], '179': [178, 179, 180, 181, 182, 183, 184, 188], '187': [185, 186, 187], '193': [189, 190, 191, 192, 193, 194, 195, 216], '198': [198], '231': [228, 229, 230, 231, 232, 233, 234, 235, 236, 244], '271': [267, 268, 269, 270, 271, 272], '274': [273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287], '393': [392, 393, 394, 395, 396, 397, 398], '401': [399, 400, 401, 402, 403, 404, 405, 406, 409, 410, 411, 413, 414, 416, 417], '408': [407, 408], '422': [418, 419, 420, 421, 422, 429], '465': [461, 462, 463, 464, 465, 467, 468, 470], '508': [506, 507, 508, 509, 510, 511], '513': [512, 513, 514, 515, 523], '518': [516, 517, 518, 520, 521], '532': [530, 531, 532, 533], '536': [534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 554, 559], '553': [552, 553], '556': [555, 556, 557, 558]}, 'explicit_1': [2, 8, 24, 37, 47, 48, 55, 56, 64, 66, 67, 68, 69, 70, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 92, 93, 94, 95, 96, 153, 196, 197, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 237, 238, 239, 240, 241, 242, 243, 295, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 347, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 412, 415, 423, 424, 425, 426, 427, 428, 430, 435, 466, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 501, 502, 503, 504, 505, 522, 524, 525, 526, 527, 528, 529], 'explicit_2': [440, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 469, 471, 472, 473, 474, 475, 476, 477, 494, 519], 'explicit_3': {0: [0, 1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28], 34: [34, 49, 50, 51, 52, 53, 54], 150: [141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 159, 160, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266], 295: [288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 342, 343, 344, 345, 346, 354, 355], 347: [348, 349, 350, 351, 352, 353], 430: [431, 432, 433, 434, 436, 437, 438, 439, 441, 442, 443], 494: [492, 493, 495, 496, 497, 498, 499, 500]}} - self.assertEqual(doc._.vwp_perspective_spans, - perspective_spans) - - def test_vwp_stance_markers(self): - doc = holmes_manager.get_document('Personal Narrative') - stance_markers = {'implicit': {'61': [61], '73': [73], '102': [102], '139': [138], '163': [168], '179': [183], '193': [190, 192], '198': [198], '231': [229, 232, 244], '271': [270], '408': [408], '422': [418], '536': [543], '553': [552, 553], '556': [556]}, 'explicit_1': [2, 8, 24, 55, 153, 237, 333, 347, 363, 367, 430, 435], 'explicit_2': [460, 476], 'explicit_3': {0: [28], 150: [145, 158, 246, 266], 295: [343, 354], 347: [349], 430: [431, 432], 494: [499]}} - self.assertEqual(doc._.vwp_stance_markers, - stance_markers) - - def test_propn_egocentric(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.propn_egocentric, 0.2642857142857143) - - def test_propn_allocentric(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.propn_allocentric, 0.18571428571428572) - - def test_propositional_attitudes(self): - doc = holmes_manager.get_document('Personal Narrative') - propositional_attitudes = {'implicit': [[[20, 28], 22, 23], [[122, 136], None, 126], [[122, 136], None, 130], [[137, 140], 137, 139], [[189, 216], 189, 193], [[228, 244], 230, 231], [[245, 266], 248, 252], [[267, 272], 269, 271], [[342, 355], 342, 344], [[356, 378], 366, 369], [[430, 443], 430, 434]], 'implicit_3': [], 'explicit_1': [[[47, 56], 47, 48]], 'explicit_2': [], 'explicit_3': {0: [[[0, 19], 0, 23]], 295: [[[288, 306], 295, 344]]}} - self.assertEqual(doc._.vwp_propositional_attitudes, - propositional_attitudes) - - def test_emotion_states(self): - doc = holmes_manager.get_document('Personal Narrative') - emotion_states = {'explicit_1': [338, 340, 371], 'explicit_2': [], 'explicit_3': {'Nancy': [50, 53], 'Lady': [71], 'Alex and Allie': [179, 184, 277, 280, 285], 'Alex': [260]}} - self.assertEqual(doc._.vwp_emotion_states, - emotion_states) - - def test_character_traits(self): - doc = holmes_manager.get_document('Personal Narrative') - character_traits = {'explicit_1': [], - 'explicit_2': [], - 'explicit_3': {'Alex and Allie': [286], - 'Lady': [553]}} - self.assertEqual(doc._.vwp_character_traits, - character_traits) - - def test_subjectivity_ratings(self): - doc = holmes_manager.get_document('Personal Narrative') - subjectivity_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.0, 0.7999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.65, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.45454545454545453, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.375, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.35714285714285715, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 1.0, 0.875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.subjectivity_ratings, subjectivity_ratings) - - def test_mean_subjectivity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.mean_subjectivity, 0.06680634636299661) - - def test_med_subjectivity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.median_subjectivity, 0.0) - - def test_max_subjectivity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.max_subjectivity, 1.0) - - def test_min_subjectivity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.min_subjectivity, 0.0) - - def test_std_subjectivity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.stdev_subjectivity, 0.20841701513602512) - - def test_polarity_ratings(self): - doc = holmes_manager.get_document('Personal Narrative') - polarity_ratings = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1875, -0.5, 0.0, 0.0, 0.0, 0.0, 0.7000000000000001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.35, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.6, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.polarity_ratings, polarity_ratings) - - def test_mean_polarity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.mean_polarity, 0.00824563367666816) - - def test_med_polarity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.median_polarity, 0.0) - - def test_max_polarity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.max_polarity, 1.0) - - def test_min_polarity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.min_polarity, -0.6) - - def test_stdev_polarity(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.stdev_polarity, 0.14514011197364973) - - def test_sentiment_ratings(self): - doc = holmes_manager.get_document('Personal Narrative') - sentiment_ratings = [0, 0, 0, -0.1, 0, 0.22, 0, 0, 0, 0, 0, -0.06, 0.61, 0.04, 0.1, 0, 0.18, -0.45, 0.55, 0, 0, 0, 0, -0.45, -0.19, 0, 0, 0, 0, 0, 0, 0, 6.000000000000001e-05, 0.62, 0, 0, 0, 0, 0, 0, 0, 0.04, 0.18, 0, 0.09, 0, 0, -0.19, 0.04, 0, 0, 0.04, 0.22, -0.04, 0, 0, 0, 0, 0.46, 0.04, 0, -0.6000000000000001, 0, -0.31, 0, 0.19, -0.07, -0.19, 0, 0, 0.04, 0.41000000000000003, 0, 0.1, 0.47000000000000003, 0, 0, 0.29, 0, 0.04, 0.6900000000000001, 0, 0.72, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0.64, 0, -0.19, 0.44, 0, 0, 0, 0, 0.34, 0, 0.12, 0, 0, -0.18, 0, 0, 0, -0.04, 0, 0, -0.07, 0.21, 0, 0.27, 0, 0, -0.07, 0.67, 0.62, 0, 0, 0.15, 0, 0, 0, -0.08, 0.1, 0.18, 0, 0, -0.08, 0.1, 0.39, 0, 0, 0, 0, 0.12, 0, 0, 0.17, 0, 0.00519, 0, 0, 0, 0, 0, 0, 0, 0, -0.19, -0.38, 0, 0.03, 0.04, 0.48, -0.01, 0, 0, 0, 0.27, 0, 0, 0.59, 0, 0, 0, 0, 0.23, 0.34, 0.06, 0.33, 0, 0, 0, 0, 0, 0.04, 0.03, 0, 0.08, 0, -0.07, 0, 0, 0, 0, 0.11, 0.21, 0.39, 0, 0, 0.54, 0, 0, 0, 0, 0, 0, 0, 0.47000000000000003, 0.04, 0.2, 0.13, 0.34, 0.27, 0, 0, 0.04, 0.5, 0.12, 0.38, 0.13, 0, 0, -0.19, -0.34, 0.15, 0, 0, 0.1, 0.19, 0, -0.19, 0, 0, 0, 0, 0.35000000000000003, -0.30000000000000004, 0.04, -0.52, 0, 0, 0, 0.19, 0, 0, 0.01, 0, -0.26, 0, 0, 0, 0, 0.33, 0, 0, 0.29, 0, 0, -0.34, -0.02, 0, 0, 0, 0, -0.31, -0.61, -0.44, 0, 0, -0.35000000000000003, 0, 0, 0, 0, 0.39, 0, 0, 0, -0.34, 0.44, 0, 0, 0, 0, 0, 0.61, 0.04, 0.52, 0, 0.86, 0, 0, 0, 0, 0, 0.04, 0, 0.42, 0, 0, 0.21, 0, 0, 0.15, 0, 0.18, 0, -0.08, 0.23, 0.29, 0, -0.07, 0, 0, 0, 0, 0, -0.45, -0.45, 0, 0, 0, 0, -0.30000000000000004, 0, 0, 0.2, 0, 0.23, 0.04, 0.15, 0, 0.02, 0.66, 0.08, 0, 0, 0, 0, 0, 0.1, 0, 0.42, 0, 0.65, 0, 0, 0.16, 0, 0, 0, -0.19, 0.42, 0, 0, 0, -0.01, 0, 0, 0, 0, 0, 0, 0, 0, 0.07, -0.04, -0.43, -0.52, 0, 0.19, 0, 0, -0.48, 0, -0.64, 0, -0.33, 0, 0, -0.04, -0.22, 0, 0, 0.17, 0, 0, 0, 0, 0, 0.08, 0, 0, 0, -0.30000000000000004, 0, 0.02, 0, 0.04, 0, 0.15, 0.52, 0, 0, 0, 0, 0.04, 0.16, 0.05, 0, 0.1, 0, 0.43, 0, 0, 0, 0, 0.58, 0, 0, 0.48, 0, 0.07, 0, 0, 0.21, 0, 0, -0.19, 0.58, 0, 0, 0.22, 0, 0.19, -0.30000000000000004, -0.35000000000000003, 0.38, -0.55, 0, -0.00519, 0, 0, 0, 0, 0, 0, 0, 0.08, 0, -0.04, 0, 0.52, 0, -0.04, 0, 0.48, 0, 0, 0.33, 0.04, 0.42, 0, 0, 0, 0, 0.02, 0.22, 0.15, 0, 0, 0.12, 0, 0, 0, 0.08, 0, 0.29, 0, 0.45, 0, 0, 0, -0.07, -0.19, 0, 0.34, 0, 0, 0, 0, 0, 0, 0.04, 0.34, 0, 0, 0.1, 0, 0.45, 0, 0.15, 0, 0, 0, -0.19, 0, 0, 0.21, 0, 0, 0, 0, 0.13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.22, 0, -0.02, 0, 0, 0, 0, 0.62, 0, 0, 0, 0, 0, -0.30000000000000004, 0, 0.75, 0, 0, 0.21, 0, 0, 0, 0, 0.08, 0.04, 0.15, 0, 0.02, 0.66, 0.08, 0, 0.04, 0.6000000000000001, 0.43, 0.47000000000000003, 0, 0, 0, 0, 0] - self.assertEqual(doc._.sentiment_ratings, sentiment_ratings) - - def test_mean_sentiment(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.mean_sentiment, 0.1034975369458128) - - def test_med_sentiment(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.median_sentiment, 0.0) - - def test_max_sentiment(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.max_sentiment, 0.86) - - def test_min_sentiment(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.min_sentiment, -0.64) - - def test_stdev_sentiment(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.stdev_sentiment, 0.28297934666238045) - - def test_tone_ratings(self): - doc = holmes_manager.get_document('Personal Narrative') - tone_ratings = [0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.06, 0.0, 0.0, 0.0, 0.0, 0.0, -0.45, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, -0.1875, -0.5, 0.0, 0.0, 0.0, 0.0, 0.46, 0.0, 0.0, -0.6000000000000001, 0.0, -0.31, 0.0, 0.0, -0.07, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.35, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, -0.18, 0.0, 0.0, 0.0, -0.04, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.13636363636363635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.08, -0.125, 0.0, 0.0, 0.0, -0.08, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, -0.38, 0.0, 0.0, 0.0, 0.6, -0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.055, 0.0, 0.195, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, -0.34, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, -0.52, 0.0, 0.0, 0.0, -0.19, 0.0, 0.0, -0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.34, -0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.39, 0.0, 0.0, 0.0, -0.34, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.08, 0.0, 0.0, 0.0, -0.07, 0.0, 0.0, 0.0, 0.0, 0.0, -0.6, -0.45, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.08, 0.0, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, -0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, 0.0, -0.5, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.035, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, 0.25, 0.0, 0.0, 0.0, 0.0, -0.19, 0.15000000000000002, 0.17500000000000002, -0.38, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.04, 0.0, 0.0, 0.0, -0.04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.07, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.19, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - self.assertEqual(doc._.tone_ratings, tone_ratings) - - def test_mean_tone(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.mean_tone, -0.01549820868786386) - - def test_med_tone(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.median_tone, 0.0) - - def test_max_tone(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.max_tone, 1.0) - - def test_min_tone(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.min_tone, -0.6000000000000001) - - def test_stdev_tone(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.stdev_tone, 0.16871829461446927) - - def test_vwp_arguments(self): - doc = holmes_manager.get_document('Personal Narrative') - vwp_arguments = [12, 20, 21, 23, 28, 47, 48, 50, 61, 62, 86, 163, 168, 190, 191, 192, 193, 196, 198, 209, 228, 229, 232, 237, 245, 246, 253, 256, 258, 259, 260, 261, 262, 263, 267, 332, 333, 343, 344, 349, 354, 356, 357, 359, 360, 366, 367, 368, 369, 370, 405, 406, 407, 408, 423, 424, 425, 430, 431, 432, 433, 434, 487, 488, 489, 490, 493, 494, 495, 496, 498, 499] - self.assertEqual(doc._.vwp_arguments, vwp_arguments) - - def test_propn_argument_words(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.propn_argument_words, 0.12857142857142856) - - def test_vwp_interactives(self): - doc = holmes_manager.get_document('Personal Narrative') - vwp_interactives = [2, 8, 20, 24, 29, 37, 47, 55, 64, 67, 92, 94, 119, 122, 125, 137, 153, 156, 158, 183, 192, 196, 201, 218, 238, 239, 256, 258, 285, 308, 317, 332, 336, 342, 343, 347, 361, 366, 367, 382, 412, 415, 424, 427, 430, 435, 440, 445, 447, 451, 459, 461, 466, 469, 472, 474, 480, 486, 488, 494, 501, 503, 512, 519, 522, 524, 526, 543] - self.assertEqual(doc._.vwp_interactives, vwp_interactives) - - def test_propn_interactive(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.propn_interactive, 0.12142857142857143) - - def test_vwp_quoted(self): - doc = holmes_manager.get_document('Personal Narrative') - vwp_quoted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - self.assertEqual(doc._.vwp_quoted, vwp_quoted) - - def test_vwp_direct_speech_spans(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.vwp_direct_speech_spans, [[[94], [], [[92, 96]]], [[295, 342, 347], [], [[342, 355]]], [[404, 412, 415, 424, 430, 435, 451, 459, 466, 474], [427, 440, 445, 469, 472], [[402, 416], [411, 416], [418, 429], [430, 443], [444, 460], [461, 470], [471, 477]]], [[484, 522, 524], [488, 494, 519, 526], [[478, 491], [492, 500], [506, 511], [512, 529]]]]) - - def test_propn_direct_speech(self): - doc = holmes_manager.get_document('Personal Narrative') - self.assertEqual(doc._.propn_direct_speech, 0.19285714285714287) - - def test_governing_subjects(self): - doc = holmes_manager.get_document('Personal Narrative') - governing_subjects = [None, 0, None, 2, None, None, None, 0, None, 8, None, None, 8, None, None, None, 8, 8, 8, None, None, None, None, 22, None, None, 24, None, None, None, 29, None, 29, 29, None, None, 34, None, 37, None, 37, None, 37, 37, None, 37, None, None, 47, None, 49, None, None, 49, 49, None, None, None, 60, None, None, 60, None, 60, None, 64, None, None, 67, 67, None, 74, None, 74, None, None, 77, None, None, None, None, None, None, None, None, None, 85, None, 87, None, 87, None, None, None, None, 94, None, None, None, None, 99, None, None, None, None, None, None, None, 107, 107, 107, None, None, 111, None, None, 107, 107, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 135, 135, None, None, None, None, 137, None, None, 148, None, 148, 148, 148, None, None, None, None, None, None, None, 148, 148, None, None, None, None, None, None, None, 161, None, 164, None, None, None, None, None, 169, None, None, None, None, None, None, None, 178, None, None, None, 178, 178, None, None, 186, None, None, None, None, 189, 189, None, None, None, None, None, None, None, None, None, 201, None, None, None, None, None, None, 201, None, None, None, None, None, None, None, None, 218, 218, None, None, 224, None, None, 224, None, None, None, None, 230, 230, None, None, None, None, None, None, None, None, 238, None, None, None, None, None, None, None, None, None, None, 248, 248, None, None, None, None, None, None, 257, 257, 257, 257, None, 257, None, None, None, None, 269, 269, None, None, 273, 273, None, 273, None, None, 273, 273, None, 273, 273, 273, 273, None, None, None, 289, None, None, 289, None, None, 295, None, 297, 297, None, None, None, None, None, 304, None, None, None, 308, 308, 308, None, 315, 315, None, None, None, 317, None, None, None, None, None, 317, None, 330, None, 330, None, None, None, None, 332, 332, None, 332, 332, 332, None, 332, None, None, 342, 342, None, None, None, 347, None, None, None, 351, None, None, None, None, 366, None, 358, None, None, None, None, None, None, None, None, None, 366, None, 366, None, 366, 366, None, None, None, None, None, 382, None, None, 382, 382, None, None, None, 382, None, None, None, None, None, None, None, None, None, None, None, None, 399, None, 399, 399, 399, 399, None, 404, None, None, None, None, 412, None, None, 415, None, None, None, None, None, 420, None, None, 424, 424, None, 427, None, None, 430, None, None, 430, None, 435, None, 435, 435, None, 438, None, None, None, None, 445, None, None, None, 445, None, 451, None, None, 445, None, None, 457, None, None, None, 464, 464, None, 464, None, 466, 466, None, None, None, None, 472, None, 474, None, None, None, None, None, 480, None, 480, None, None, None, 486, None, None, None, None, None, None, None, 494, 497, None, 497, None, None, None, 501, None, 503, None, None, None, 507, None, 507, None, None, 512, None, 512, None, None, 517, None, 519, 520, None, None, None, 524, None, 524, None, None, None, None, 531, None, None, None, 535, None, None, None, None, None, None, None, None, 549, None, 549, None, None, None, None, 553, 554, None, None, 554, 554, None, None] - self.assertEqual(doc._.governing_subjects, governing_subjects) - - def test_content_segments(self): - doc = holmes_manager.get_document('Personal Narrative') - content_segments = [[0, 20], [20, 29], [29, 47], [47, 57], [57, 85], [85, 92], [92, 97], [97, 122], [122, 137], [137, 141], [141, 287], [288, 392], [392, 399], [399, 418], [418, 430], [430, 444], [444, 461], [461, 471], [471, 478], [478, 492], [492, 560]] - self.assertEqual(doc._.content_segments, content_segments) - - def test_prompt_related(self): - doc = holmes_manager.get_document('Personal Narrative') - prompt_related = [[4, 3.0605871330418486, ['walk', 'keep', 'crawl', 'pull', 'notice', 'look', 'reach', 'pass', 'stare', 'shake'], [7, 68, 219, 274, 309, 318, 324, 333, 383, 388, 422, 481, 502, 518]], [22, 2.127659574468085, ['Nancy', 'Allie', 'Alex'], [34, 88, 90, 148, 150, 295, 484, 517]], [19, 1.5341264871634313, ['sorry', 'happy', 'fool', 'silly', 'laugh', 'darling'], [53, 285, 286, 363, 371, 413, 553]]] - self.assertEqual(doc._.prompt_related, prompt_related) - - def test_prompt_language(self): - doc = holmes_manager.get_document('Personal Narrative') - prompt_language = ['walk', 'walked', 'keep', 'kept', 'pull', 'pulled', 'look', 'looked', 'actual', 'actually', 'pass', 'passed', 'first'] - self.assertEqual(doc._.prompt_language, prompt_language) - - def test_core_sentences(self): - doc = holmes_manager.get_document('Personal Narrative') - core_sentences = [] - self.assertEqual(doc._.core_sentences, core_sentences) From 826f93e0102bb4b75e455fc157b4855abd695af8 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 7 Nov 2024 15:16:43 -0500 Subject: [PATCH 34/39] Updated test name; requirements; docs --- README.md | 103 ++++++++---------- requirements.txt | 8 +- setup.cfg | 4 + ...t_awe_nlp_no_holmes.py => test_awe_nlp.py} | 0 4 files changed, 52 insertions(+), 63 deletions(-) rename tests/{test_awe_nlp_no_holmes.py => test_awe_nlp.py} (100%) diff --git a/README.md b/README.md index 89265cc..0f8b8dc 100755 --- a/README.md +++ b/README.md @@ -1,90 +1,79 @@ -================= -INSTALLATION -================= +# AWE Workbench -To install: +This project serves as a workbench for open-source natural language processing pipeline designed to support automated writing evaluation (AWE) – automated scoring and feedback of student essays and other educational materials. -Set up Python 3.9. 3.8 will *not* work +## AWE Project Dependencies -I have tested installation using conda. My set up sequence for the -environment looks like this: +This project installs 4 other dependencies (more info on their individual repositories below): -conda create -n test_install python=3.9 pip -pip install pip --upgrade -conda activate test_install +* [AWE_Components](https://github.com/ArgLab/AWE_Components) +* [AWE_Lexica](https://github.com/ArgLab/AWE_Lexica) +* [AWE_LanguageTool](https://github.com/ArgLab/AWE_LanguageTool) +* [AWE_SpellCorrect](https://github.com/ArgLab/AWE_SpellCorrect) -Then from the AWE Workbench directory: +## Installation - pip install . ('pip install -e .' if installing in development mode) - python -m awe_workbench.setup.data --install (--develop if installing in development mode) +Before installing, make sure you have a python (3.11) virtual environment (venv, conda, etc). -You MUST run the setup.data script before running the program. -Incidentally, we don't yet have the package registered with pip, so you can't simply pip install awe_workbench yet. +Once you have set up your environment, ensure you are in the root directory of this project, then run: -Basic usage to get the program running as a set of servers, which can then be called to analyze one or more texts: +```bash +# Install Workbench from root directory +pip install -e . -python -m awe_workbench.web.startServers - (use the -fp flag to deactivate a couple of resource-intensive functions, - including the coreference module, coreferee). -python -m awe_components.wordprobs.wordseqProbabilityServer - (only currently used if coreferee is called, so you don't need to start - this module if the -fp flag is used in the previous call.) - -Examples of how to send queries to the server are provided in the AWE Workbench/examples directory. - -================= -OVERVIEW -================= - -What is the AWE Workbench? +# Develop flag is to be used if in development mode +python -m awe_workbench.setup.data --[install/develop] +``` -This package is an open-source natural language processing pipeline designed to support automated writing evaluation (AWE) – automated scoring and feedback of student essays and other educational materials. As such, it is intended to identify text features that may be needed +## Running Workbench & Tests -· To evaluate the quality of student writing +Before running any tests, ensure that you run the main and WordSeqProbability servers: -· To visualize significant features of student or model texts +```bash +# Main Server +python -m awe_workbench.web.startServers -· To analyze text content +# wordSeqProbabilityServer +python -m awe_components.wordprobs.wordseqProbabilityServer +``` -· To provide instructional feedback +After this, you can run the main suite of tests: -The AWE Workbench draws upon several open-source packages widely used in the NLP community. These include LanguageTool, an open source grammar/spell checker, the Spacy parser, two Spacy modules (Coreferee, for coreference resolution, and Holmes Extractor, for content matching and topic analysis), an instance of the BERT deep learning transformer supplied by HuggingFace, the open source spell-correction modules PySymSpell and Neuspell, and various lexical resources including WordNet, the Wordfreq word frequency module, the Morpholex morphology database, several academic word lists, two different sentiment lexicons, and a number of custom lexicons. Details for the lexical resources included in the AWE Workbench are noted in the license file. The AWE Workbench integrates these resources and provides a number of additional facilities for NLP analysis of student texts, as outlined below. +```bash +pytest tests/test_awe_nlp.py +``` -The AWE Workbench is designed to support analysis of the linguistic features of texts. As such, it is similar to various existing software tools (see, for example, https://www.linguisticanalysistools.org/tools.html), but at the time of writing it is the only general, open-source AWE software package of which the authors are aware that is available under the GNU Affero General Public License, rather than more restrictive licenses like the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International license. Currently, the package only supports English, though many of the modules included in it are multilingual. The authors welcome participation from members of the open-source community who may wish to support further development of the AWE Workbench (in English, or by extending it to other languages). We hope that it will help researchers and developers implement high-quality, state-of-the-at automated writing evaluation tools and applications. +## Package Structure -========================================= -Overall Structure of the Package -========================================= +There are 4 services provided by Workbench: -The AWE Workbench provides four basic services: +1. A LanguageTool wrapper. LanguageTool identifies grammar, usage, mechanics, and style errors, and provides feedback text that can be displayed to the user. Our LanguageTool wrapper provides an additional layer of classification that makes it easier to identify errors that address the same specific construct, such as subject/verb agreement within grammar, or likely types within spelling. -· A LanguageTool wrapper. LanguageTool identifies grammar, usage, mechanics, and style errors, and provides feedback text that can be displayed to the user. Our LanguageTool wrapper provides an additional layer of classification that makes it easier to identify errors that address the same specific construct, such as subject/verb agreement within grammar, or likely types within spelling. +2. A spelling correction module that incorporates PySymSpell and Neuspell. This module is specifically designed to be used to standardize the spelling of student texts written on a common topic, so that they can be subjected to further NLP analysis. It works best when applied to a corpus of student texts that reflects a range of spelling abilities, so that it can infer the correct spellings of words commonly used to address a specific assignment. When this information is not available, or is not sufficient, it falls back on the spell-correction facilities provided by Neuspell, a state-of-the-art transformer-based spell-corrector. -· A spelling correction module that incorporates PySymSpell and Neuspell. This module is specifically designed to be used to standardize the spelling of student texts written on a common topic, so that they can be subjected to further NLP analysis. It works best when applied to a corpus of student texts that reflects a range of spelling abilities, so that it can infer the correct spellings of words commonly used to address a specific assignment. When this information is not available, or is not sufficient, it falls back on the spell-correction facilities provided by Neuspell, a state-of-the-art transformer-based spell-corrector. +3. A wrapper for the BERT transformer that allows the user to extract the probability of words in context. BERT can, of course, be used independently to build a variety of classifiers, though currently the AWE Workbench uses it only in a few, relatively limited contexts. -· A wrapper for the BERT transformer that allows the user to extract the probability of words in context. BERT can, of course, be used independently to build a variety of classifiers, though currently the AWE Workbench uses it only in a few, relatively limited contexts. +4. A natural language processing (NLP) pipeline built around the Spacy parser. In addition to the Coreferee and Holmes Extractor modules, this pipeline includes custom components. -· A natural language processing (NLP) pipeline built around the Spacy parser. In addition to the Coreferee and Holmes Extractor modules, this pipeline includes the following custom components: +### Custom Components to NLP pipeline -o A lexical feature component, which calculates a variety of word-based features, including token, lemma and root counts and frequencies, size of word families, syllable counts, word length, latinate and academic vocabulary status, number of word senses, and measures of animacy, abstractness/concreteness, sentiment and subjectivity. +* A lexical feature component, which calculates a variety of word-based features, including token, lemma and root counts and frequencies, size of word families, syllable counts, word length, latinate and academic vocabulary status, number of word senses, and measures of animacy, abstractness/concreteness, sentiment and subjectivity. -o A lexical cluster component, which provides an agglomerative clustering of the words that appear within a document using Spacy word vectors. These clusters are used to support a number of other statistics, such as measures of the distribution of the largest clusters (which are likely to reflect the primary topic of the document) and of the difficulty of the vocabulary that appears in other, smaller clusters (which are likely to reflect development of secondary topics). +* A lexical cluster component, which provides an agglomerative clustering of the words that appear within a document using Spacy word vectors. These clusters are used to support a number of other statistics, such as measures of the distribution of the largest clusters (which are likely to reflect the primary topic of the document) and of the difficulty of the vocabulary that appears in other, smaller clusters (which are likely to reflect development of secondary topics). -o A syntax and discourse feature component, which provides measures of the number and length of sentences and paragraphs, the number and types of transition words used to mark discourse segments, and the number and length of pronominal coreference chains; measures of syntactic complexity and variety such as depth of embedding and the number of tags and types of grammatical dependencies deployed in a text, and measures of textual coherence, such as the cosine similarity of content words in adjacent sentences or across paragraph boundaries and other discourse transitions. +* A syntax and discourse feature component, which provides measures of the number and length of sentences and paragraphs, the number and types of transition words used to mark discourse segments, and the number and length of pronominal coreference chains; measures of syntactic complexity and variety such as depth of embedding and the number of tags and types of grammatical dependencies deployed in a text, and measures of textual coherence, such as the cosine similarity of content words in adjacent sentences or across paragraph boundaries and other discourse transitions. -o A viewpoint feature component, which identifies viewpoint predicates, such as emotion, cognition, and perception words, stance markers, which indicate the subjective perspective of the writer, and markers of direct and indirect speech. The viewpoint feature component uses this information to determine what parts of a text are to be evaluated as reflecting a specific viewpoint. The resulting features are used to support various genre-specific analyses, such as identification of the parts of a text that contain argumentation, or which contain references to the actions and mental states of story characters. +* A viewpoint feature component, which identifies viewpoint predicates, such as emotion, cognition, and perception words, stance markers, which indicate the subjective perspective of the writer, and markers of direct and indirect speech. The viewpoint feature component uses this information to determine what parts of a text are to be evaluated as reflecting a specific viewpoint. The resulting features are used to support various genre-specific analyses, such as identification of the parts of a text that contain argumentation, or which contain references to the actions and mental states of story characters. -o A content segmentation component, which identifies major content blocks marked by chains of repeated or related words, and which determines whether individual sentences have significant content that address the main ideas of an essay and/or overlap with specified prompt language. +* A content segmentation component, which identifies major content blocks marked by chains of repeated or related words, and which determines whether individual sentences have significant content that address the main ideas of an essay and/or overlap with specified prompt language. These modules are by design rule-based, rather than statistical in nature, and intended to capture features of the text that can be explicitly identified and labeled using linguistic knowledge. They capture dimensions that have been established as relevant to essay quality and structure in the research literature, which can be an important consideration when building educational applications. These criteria led to the exclusion of some forms of text analysis, such as rhetorical-structure parsing, which depend critically on a statistical model. However, the linguistic features supported by the AWE workbench include most of the surface cues that such models exploit. The outputs created by the AWE Workbench can easily be used as inputs to more sophisticated, statistical classifiers, but if used without modification, they are intended to provide a strong baseline for analyzing student texts. It is important to note that while the features deployed in the AWE Workbench may bear a family resemblance to features deployed in commercial AWE systems, they were for the most part selected because they capture what patent law would recognize as prior art – well-known, long-established methods for analyzing student texts using natural language processing techniques. Places where the AWE Workbench contains novel contributions are identified below. Such contributions may be subject to patent applications filed by the authors, but are nonetheless released for use under the terms of the Gnu Affero public license. -Also note that we include a simple server API, to support use cases where the AWE Workbench needs to run in a distributed environemtn. +Also note that we include a simple server API, to support use cases where the AWE Workbench needs to run in a distributed environment. -========================================== -Contributions that Go Beyond Prior Art -========================================== +## Contributions While largely based on prior art, the AWE Workbench does contain several significant innovations, which are outlined below. @@ -92,10 +81,6 @@ Open-Source Concreteness Estimates. The largest, most reliable databases of lexi The ETS Viewpoint Lexicon. The AWE Workbench defines an approach to subjectivity and stance-taking that depends on a fine-grained lexical classification of cognition, perception, emotion, and communication predicates, developed by the authors as part of IES grant R205A210297. Using this classification, the AWE Workbench uses syntactic dependencies identified by the Spacy parser to locate noun phrases and clauses that define the viewpoints attributed to these predicates and the propositions to which those viewpoints apply. This makes it possible to identify explicit argument language, important features of narrative, and other aspects of text structure that depend upon stance and viewpoint. As such, it covers similar ground to such widely-used resources as the MPQA argument lexicon, but in greater depth, and follows a design philosophy similar to that defended by St. Dizier (2020). We are currently validating the use of this lexicon. However, its results appear to be accurate enough to use to identify potential argument or narrative language in a text. -============================ -Applications -============================ +## Applications Like most state-of-the-art AWE systems, the AWE Workbench can be used to extract summary features that can be used to predict human scores or classify student responses into meaningful categories. We are currently validating these uses with publicly available datasets. However, since equivalent features to those deployed by the AWE Workbench have already been validated elsewhere, the user can proceed with similar applications in advance of our completion of this effort. We will include a script for extracting summary features with the distribution we are preparing for public release. - - diff --git a/requirements.txt b/requirements.txt index 8928b2e..7cdeef9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -awe_languagetool -awe_spellcorrect -awe_components -awe_lexica +AWE_SpellCorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git +AWE_Components @ git+https://github.com/ArgLab/AWE_Components.git +AWE_Lexica @ git+https://github.com/ArgLab/AWE_Lexica.git +AWE_LanguageTool @ git+https://github.com/ArgLab/AWE_LanguageTool.git holmes_extractor coreferee spacy diff --git a/setup.cfg b/setup.cfg index 1a3dac4..482e3b5 100755 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,10 @@ include_package_data = True packages = find: python_requires = >=3.9 install_requires = + AWE_SpellCorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git + AWE_Components @ git+https://github.com/ArgLab/AWE_Components.git + AWE_Lexica @ git+https://github.com/ArgLab/AWE_Lexica.git + AWE_LanguageTool @ git+https://github.com/ArgLab/AWE_LanguageTool.git awe_languagetool awe_spellcorrect awe_components diff --git a/tests/test_awe_nlp_no_holmes.py b/tests/test_awe_nlp.py similarity index 100% rename from tests/test_awe_nlp_no_holmes.py rename to tests/test_awe_nlp.py From db9ca1e6e176ae053e7ded4fc57f9f6b6bdcb7fe Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 7 Nov 2024 15:24:04 -0500 Subject: [PATCH 35/39] Update old requirements --- requirements.txt | 41 ++++------------------------------------- setup.cfg | 6 +----- 2 files changed, 5 insertions(+), 42 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7cdeef9..e0b98a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,46 +1,13 @@ +AWE_LanguageTool @ git+https://github.com/ArgLab/AWE_LanguageTool.git AWE_SpellCorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git AWE_Components @ git+https://github.com/ArgLab/AWE_Components.git AWE_Lexica @ git+https://github.com/ArgLab/AWE_Lexica.git -AWE_LanguageTool @ git+https://github.com/ArgLab/AWE_LanguageTool.git -holmes_extractor -coreferee -spacy -spacytextblob <= 3.0.1 -tensorflow -scipy -sklearn -bs4 -rdflib -jsonpickle -msgpack-numpy -falcon -torch -wordfreq -pygtrie -symspellpy -autocorrect -neuspell -nltk -numpy -aenum -statistics -srsly websockets websocket-client aiohttp -absl-py -keras -keras-nightly -keras-preprocessing -tensorboard-data-server -h5py -typing-extensions -six -clint requests +streamlit +st-annotated-text names openpyxl -pandas -streamlit -st-annotated_text -protobuf +tensorflow diff --git a/setup.cfg b/setup.cfg index 482e3b5..da23610 100755 --- a/setup.cfg +++ b/setup.cfg @@ -29,14 +29,10 @@ include_package_data = True packages = find: python_requires = >=3.9 install_requires = + AWE_LanguageTool @ git+https://github.com/ArgLab/AWE_LanguageTool.git AWE_SpellCorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git AWE_Components @ git+https://github.com/ArgLab/AWE_Components.git AWE_Lexica @ git+https://github.com/ArgLab/AWE_Lexica.git - AWE_LanguageTool @ git+https://github.com/ArgLab/AWE_LanguageTool.git - awe_languagetool - awe_spellcorrect - awe_components - awe_lexica websockets websocket-client aiohttp From 1328501fe5c76416c8719ca98c7343235737097f Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 7 Nov 2024 16:03:35 -0500 Subject: [PATCH 36/39] Removed lexica to install; components already does this --- requirements.txt | 7 +++---- setup.cfg | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index e0b98a9..f292564 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ -AWE_LanguageTool @ git+https://github.com/ArgLab/AWE_LanguageTool.git -AWE_SpellCorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git -AWE_Components @ git+https://github.com/ArgLab/AWE_Components.git -AWE_Lexica @ git+https://github.com/ArgLab/AWE_Lexica.git +awe_languagetool @ git+https://github.com/ArgLab/AWE_LanguageTool.git +awe_spellcorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git +awe_components @ git+https://github.com/ArgLab/AWE_Components.git websockets websocket-client aiohttp diff --git a/setup.cfg b/setup.cfg index da23610..421d681 100755 --- a/setup.cfg +++ b/setup.cfg @@ -29,10 +29,9 @@ include_package_data = True packages = find: python_requires = >=3.9 install_requires = - AWE_LanguageTool @ git+https://github.com/ArgLab/AWE_LanguageTool.git - AWE_SpellCorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git - AWE_Components @ git+https://github.com/ArgLab/AWE_Components.git - AWE_Lexica @ git+https://github.com/ArgLab/AWE_Lexica.git + awe_languagetool @ git+https://github.com/ArgLab/AWE_LanguageTool.git + awe_spellcorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git + awe_components @ git+https://github.com/ArgLab/AWE_Components.git websockets websocket-client aiohttp From f55cd5619675ae5e2d83da01ab3e177605d9dc11 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 14 Nov 2024 21:02:37 -0500 Subject: [PATCH 37/39] Removed old parser server code --- awe_workbench/web/oldParserServer.py | 1514 -------------------------- setup.cfg | 4 +- 2 files changed, 2 insertions(+), 1516 deletions(-) delete mode 100644 awe_workbench/web/oldParserServer.py diff --git a/awe_workbench/web/oldParserServer.py b/awe_workbench/web/oldParserServer.py deleted file mode 100644 index 29ef26d..0000000 --- a/awe_workbench/web/oldParserServer.py +++ /dev/null @@ -1,1514 +0,0 @@ -""" ---- [ Test: parserServer.py ] ----------------------------------------------------------- - -Main server for parsing commands regarding spacy pipeline. - -@8/20/2024: modifications made to function without the use of holmes-extractor; this is -to make AWE_Workbench far easier to manage regarding dependency issues. - -Author: Caleb Scott (cwscott3@ncsu.edu) - -Copyright 2022, Educational Testing Service - ------------------------------------------------------------------------------------------ -""" - -# --- [ IMPORTS ] ----------------------------------------------------------------------- - -# Basic lib imports -import asyncio -import base64 -import websockets -import json -import spacy -import coreferee -import spacytextblob.spacytextblob - -# AWE imports -from awe_components.components.utility_functions import content_pos -import awe_components.components.lexicalFeatures -import awe_components.components.syntaxDiscourseFeats -import awe_components.components.viewpointFeatures -import awe_components.components.lexicalClusters -import awe_components.components.contentSegmentation -from awe_workbench.pipeline import pipeline_def - -# --- [ CONSTS/VARS ] ------------------------------------------------------------------- - -HOST = 'localhost' -PORT = 8766 -MAX_DATA_LIMIT = 2 ** 24 -SPACY_MODEL = 'en_core_web_lg' -COMPONENTS = [el['component'] for el in pipeline_def] -AWE_INFO_KEYS = ['indicator', 'infoType', 'summaryType', 'filters', 'transformations'] - -# --- [ CLASSES ] ----------------------------------------------------------------------- - -class parserServer: - - def __init__(self): - - # Set up the NLP pipeline - print("initializing NLP pipeline...") - try: - self.nlp = spacy.load(SPACY_MODEL) - for comp in COMPONENTS: - self.nlp.add_pipe(comp) - except OSError as e: - print("There was an error loading 'en_core_web_lg' from spacy.") - raise OSError() from e - - # Instead of using holmes, we save the docs in memory - self.docs = {} - self.partial = "" - - # Start the event loop, and run until the kill command - print("starting event loop -- use [KILL] command to terminate.") - asyncio.get_event_loop().run_until_complete( - websockets.serve(self.run_parser, HOST, PORT, max_size=MAX_DATA_LIMIT)) - print('parser server running...') - asyncio.get_event_loop().run_forever() - print('parser server terminated...') - - async def kill(self, websocket): - """ - Command called to kill the parser server. - """ - self.parser.close() - await websocket.send(json.dumps(True)) - await websocket.close() - exit() - - def clear_parsed(self): - """ - Resets the document store to an empty mapping. - """ - self.docs = {} - return True - - def remove(self, label): - """ - Removes a document from the document store. - """ - del self.docs[label] - return True - - def parse_one(self, label, text): - """ - Parses a single document, and adds it to the document store. - - NOTE: we overwrite documents with the same key. - """ - self.docs[label] = self.partial + self.nlp(text) - self.partial = "" - print(f"parsed document: {label}") - return True - - def partial_text(self, text): - """ - Adds partial text to be processed in the future. - """ - self.partial += text - return True - - def parse_set(self, doc_set): - """ - Parses a document list of tuples (labels, text). - """ - for label, text in doc_set: - self.parse_one(label, text) - return True - - def labels(self): - """ - Returns a list of all document labels. - """ - return list(self.docs.keys()) - - def serialized(self, label): - """ - Returns a serialized document, selected by label - """ - return base64.b64encode(self.docs[label]) - - def new_search_phrase(self): - pass - - def remove_labeled_search(self): - pass - - def clear_searches(self): - pass - - def show_search_labels(self): - pass - - def match_documents(self): - pass - - def frequencies(self): - pass - - def topic_matches(self): - pass - - def awe_info(self, label, *args): - """ - Returns information specified in an AWE_Info object. - - This information is determined by: - * indic - indicator name - * itype - information type - * summ - summary type - * filt - filters - * trans - transformations - """ - doc = self.docs[label] - kwargs = {} - - # Get the appropriate arguments for AWE_Info - # Since we have a list of values, we need to map them first - for i, val in enumerate(args): - kwargs[AWE_INFO_KEYS[i]] = val - if not kwargs: - return None - else: - return doc._.AWE_Info(**kwargs) - - def fast_map_awe_info(self, command): - """ - Maps to awe_info(), given a simple command - """ - pass - - def doc_heads(self, label): - """ - Returns list of token heads for a given document. - """ - doc = self.docs[label] - return [token.head.i for token in doc] - - def pos(self, label): - """ - Returns positions of tokens for a given document. - """ - doc = self.docs[label] - return [token.pos_ for token in doc] - - def doc_dependencies(self, label): - """ - Returns dependencies of tokens for a given document. - """ - doc = self.docs[label] - return [token.dep_ for token in doc] - - def doc_entities(self, label): - """ - Returns all entities for a given document. - """ - doc = self.docs[label] - return [ - [ - ent.text, - ent.start_char, - ent.end_char, - ent.label_ - ] for ent in doc.ents - ] - - def tok_vecs(self, label): - """ - Returns token vectors for a given document. - """ - doc = self.docs[label] - return doc._.token_vectors - - summaryLabels = [ - 'mean_nSyll', - 'med_nSyll', - 'max_nSyll', - 'min_nSyll', - 'std_nSyll', - 'mean_sqnChars', - 'med_sqnChars', - 'max_sqnChars', - 'min_sqnChars', - 'std_sqnChars', - 'propn_latinate', - 'propn_academic', - 'mean_family_size', - 'med_family_size', - 'max_family_size', - 'min_family_size', - 'std_family_size', - 'mean_concreteness', - 'med_concreteness', - 'max_concreteness', - 'min_concreteness', - 'std_concreteness', - 'mean_logNSenses', - 'med_logNSenses', - 'max_logNSenses', - 'min_logNSenses', - 'std_logNSenses', - 'mean_nMorph', - 'med_nMorph', - 'max_nMorph', - 'min_nMorph', - 'std_nMorph', - 'mean_logfreq_HAL', - 'med_logfreq_HAL', - 'max_logfreq_HAL', - 'min_logfreq_HAL', - 'std_logfreq_HAL', - 'mean_root_fam_size', - 'med_root_fam_size', - 'max_root_fam_size', - 'min_root_fam_size', - 'std_root_fam_size', - 'mean_root_pfmf', - 'med_root_pfmf', - 'max_root_pfmf', - 'min_root_pfmf', - 'std_root_pfmf', - 'mean_token_frequency', - 'median_token_frequency', - 'max_token_frequency', - 'min_token_frequency', - 'std_token_frequency', - 'mean_lemma_frequency', - 'median_lemma_frequency', - 'max_lemma_frequency', - 'min_lemma_frequency', - 'std_lemma_frequency', - 'mean_max_frequency', - 'median_max_frequency', - 'max_max_frequency', - 'min_max_frequency', - 'std_max_frequency', - 'propn_abstract_traits', - 'propn_animates', - 'propn_deictics', - 'wf_type_count', - 'lemma_type_count', - 'type_count', - 'token_count', - 'paragraph_count', - 'mean_paragraph_length', - 'median_paragraph_length', - 'max_paragraph_length', - 'min_paragraph_length', - 'stdev_paragraph_length', - 'propn_transition_words', - 'transition_category_count', - 'transition_word_type_count', - 'mean_transition_distance', - 'median_transition_distance', - 'max_transition_distance', - 'min_transition_distance', - 'stdev_transition_distance', - 'mean_sent_cohesion', - 'median_sent_cohesion', - 'max_sent_cohesion', - 'min_sent_cohesion', - 'stdev_sent_cohesion', - 'mean_slider_cohesion', - 'median_slider_cohesion', - 'max_slider_cohesion', - 'min_slider_cohesion', - 'stdev_slider_cohesion', - 'num_corefs', - 'mean_coref_chain_len', - 'median_coref_chain_len', - 'max_coref_chain_len', - 'min_coref_chain_len', - 'stdev_coref_chain_len', - 'sentence_count', - 'mean_sentence_len', - 'median_sentence_len', - 'max_sentence_len', - 'min_sentence_len', - 'std_sentence_len', - 'mean_words_to_sentence_root', - 'median_words_to_sentence_root', - 'max_words_to_sentence_root', - 'min_words_to_sentence_root', - 'stdev_words_to_sentence_root', - 'meanRhemeDepth', - 'medianRhemeDepth', - 'maxRhemeDepth', - 'minRhemeDepth', - 'stdevRhemeDepth', - 'meanThemeDepth', - 'medianThemeDepth', - 'maxThemeDepth', - 'minThemeDepth', - 'stdevThemeDepth', - 'meanWeightedDepth', - 'medianWeightedDepth', - 'maxWeightedDepth', - 'minWeightedDepth', - 'stdevWeightedDepth', - 'meanWeightedBreadth', - 'medianWeightedBreadth', - 'maxWeightedBreadth', - 'minWeightedBreadth', - 'stdevWeightedBreadth', - 'syntacticVariety', - 'propn_past', - 'propn_argument_words', - 'propn_direct_speech', - 'propn_egocentric', - 'propn_allocentric', - 'mean_subjectivity', - 'median_subjectivity', - 'min_subjectivity', - 'max_subjectivity', - 'stdev_subjectivity', - 'mean_polarity', - 'median_polarity', - 'min_polarity', - 'max_polarity', - 'stdev_polarity', - 'mean_sentiment', - 'median_sentiment', - 'min_sentiment', - 'max_sentiment', - 'stdev_sentiment', - 'mean_main_cluster_span', - 'median_main_cluster_span', - 'min_main_cluster_span', - 'max_main_cluster_span', - 'stdev_main_cluster_span', - 'propn_devwords', - 'mean_devword_nsyll', - 'median_devword_nsyll', - 'min_devword_nsyll', - 'max_devword_nsyll', - 'stdev_devword_nsyll', - 'mean_devword_nmorph', - 'median_devword_nmorph', - 'min_devword_nmorph', - 'max_devword_nmorph', - 'stdev_devword_nmorph', - 'mean_devword_nsenses', - 'median_devword_nsenses', - 'min_devword_nsenses', - 'max_devword_nsenses', - 'stdev_devword_nsenses', - 'mean_devword_token_freq', - 'median_devword_token_freq', - 'min_devword_token_freq', - 'max_devword_token_freq', - 'stdev_devword_token_freq', - 'mean_devword_concreteness', - 'median_devword_concreteness', - 'min_devword_concreteness', - 'max_devword_concreteness', - 'stdev_devword_concreteness' - ] - - async def run_parser(self, websocket, path): - current_doc = '' - async for message in websocket: - - messagelist = json.loads(message) - print(messagelist) - command = '' - if messagelist[0] == 'KILL': - command = 'KILL' - await websocket.send(json.dumps(True)) - await self.kill(websocket) - elif messagelist[0] == 'CLEARPARSED': - command = 'CLEARPARSED' - self.parser.remove_all_documents() - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'REMOVE': - command = 'REMOVE' - label = messagelist[1] - self.parser.remove_document(label) - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'PARSEONE': - command = 'PARSEONE' - label = messagelist[1] - text = current_doc + messagelist[2] - current_doc = '' - if label in self.parser.list_document_labels(): - self.parser.remove_document(label) - self.parser.parse_and_register_document(text, label) - doc = self.parser.get_document(label) - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'PARTIALTEXT': - current_document += messagelist[2] - elif messagelist[0] == 'PARSESET': - command = 'PARSESET' - results = [] - [labels, texts] = messagelist[1] - for i, text in enumerate(texts): - text = texts[i] - print('parsed document', str(i+1), 'of', len(texts)) - if text is not None and len(text) > 0: - if labels[i] in self.parser.list_document_labels(): - self.parser.remove_document(labels[i]) - self.parser.parse_and_register_document( - text, labels[i]) - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'LABELS': - command = 'LABELS' - labels = self.parser.list_document_labels() - await websocket.send(json.dumps(labels)) - elif messagelist[0] == 'SERIALIZED': - command = 'SERIALIZED' - label = messagelist[1] - serialized = base64.b64encode( - self.parser.serialize_document(label)) - await websocket.send(serialized) - elif messagelist[0] == 'NEWSEARCHPHRASE': - command = 'NEWSEARCHPHRASE' - search_phrase_text = messagelist[1] - label = messagelist[2] - ok = self.parser.register_search_phrase(search_phrase_text) - await websocket.send(ok) - elif messagelist[0] == 'REMOVELABELEDSEARCH': - command = 'REMOVELABELEDSEARCH' - label = messagelist[1] - self.parser.remove_all_search_phrases_with_label(label) - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'CLEARSEARCHES': - command = 'CLEARSEARCHES' - self.parser.remove_all_search_phrases() - await websocket.send(json.dumps(True)) - elif messagelist[0] == 'SHOWSEARCHLABELS': - command = 'SHOWSEARCHLABELS' - labels = self.parser.list_search_phrase_labels() - await websocket.send(json.dumps(labels)) - elif messagelist[0] == 'MATCH_DOCUMENTS': - command = 'MATCH_DOCUMENTS' - matches = self.parser.match() - await websocket.send(json.dumps(matches)) - elif messagelist[0] == 'FREQUENCIES': - command = 'FREQUENCIES' - freqinfo = self.parser.get_corpus_frequency_information() - await websocket.send(json.dumps(freqinfo)) - elif messagelist[0] == 'TOPIC_MATCHES': - command = 'TOPIC_MATCHES' - text_to_match = messagelist[1] - # This search takes a long list of keyword parameters, - # all of them with preset default thresholds. TBD: - # expose all of these parameters in more complex topic - # match functionality. Holmes extractor documentation - # describes what each of these parameters involves. - matches = self.parser.topic_match_documents_against( - text_to_match, - word_embedding_match_threshold=.42, - relation_score=20, - reverse_only_relation_score=15, - single_word_score=10, - single_word_any_tag_score=5, - different_match_cutoff_score=10, - relation_matching_frequency_threshold=0.0, - embedding_matching_frequency_threshold=0.0, - use_frequency_factor=True) - await websocket.send(json.dumps(matches)) - # Holmes Extractor also has supervised topic model - # building facilities using the functions - # get_supervised_topic_training_basis(), - # and deserialize_supervised_topic_classifier(). - # TBD: Add support for Holmes supervised topic model - # building. - elif messagelist[0] == 'AWE_INFO': - label = messagelist[1] - doc = self.parser.get_document(label) - indic = None - itype = None - summ = None - filt = None - if len(messagelist) == 3: - indic = messagelist[2] - await websocket.send( - doc._.AWE_Info(indicator=indic)) - elif len(messagelist) == 4: - indic = messagelist[2] - itype = messagelist[3] - await websocket.send( - doc._.AWE_Info(indicator=indic,infoType=itype)) - elif len(messagelist) == 5: - indic = messagelist[2] - itype = messagelist[3] - summ = messagelist[4] - result = \ - doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ) - if type(result) in [int, float, bool]: - await websocket.send(str(result)) - else: - await websocket.send(result) - - elif len(messagelist) == 6: - indic = messagelist[2] - itype = messagelist[3] - summ = messagelist[4] - filt = json.loads(messagelist[5]) - result = \ - doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt) - if type(result) in [int, float]: - await websocket.send(str(result)) - else: - await websocket.send(result) - elif len(messagelist) == 7: - indic = messagelist[2] - itype = messagelist[3] - summ = messagelist[4] - filt = json.loads(messagelist[5]) - trans = json.loads(messagelist[6]) - result = \ - doc._.AWE_Info(indicator=indic,infoType=itype,summaryType=summ,filters=filt,transformations=trans) - if type(result) in [int, float]: - await websocket.send(str(result)) - else: - await websocket.send(result) - - else: - await websocket.send(json.dumps([])) - elif messagelist[0] == 'DOCTOKENS': - label = messagelist[1] - doc = self.parser.get_document(label) - if doc is not None: - await websocket.send( - doc._.AWE_Info(indicator='text')) - else: - await websocket.send(json.dumps([])) - elif messagelist[0] == 'DOCTOKENS_WITH_WS': - label = messagelist[1] - doc = self.parser.get_document(label) - if doc is not None: - await websocket.send( - doc._.AWE_Info(indicator='text_with_ws')) - else: - await websocket.send(json.dumps([])) - elif messagelist[0] == 'DOCHEADS': - command = 'DOCHEADS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - heads = [token.head.i for token in doc] - await websocket.send(json.dumps(heads)) - elif messagelist[0] == 'POS': - command = 'POS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - heads = [token.pos_ for token in doc] - await websocket.send(json.dumps(heads)) - elif messagelist[0] == 'DOCDEPENDENCIES': - command = 'DOCDEPENDENCIES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - deps = [token.dep_ for token in doc] - await websocket.send(json.dumps(deps)) - elif messagelist[0] == 'DOCENTITIES': - command = 'DOCENTITIES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - ents = [[ent.text, - ent.start_char, - ent.end_char, - ent.label_] for ent in doc.ents] - await websocket.send(json.dumps(ents)) - elif messagelist[0] == 'TOKVECS': - command = 'TOKVECS' - # List returned contains lists pairing token - # offset with token vectors cast as strings - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.token_vectors)) - elif messagelist[0] == 'LEMMAS': - command = 'LEMMAS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(indicator='lemma_') - ) - elif messagelist[0] == 'STOPWORDS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(indicator='is_stop') - ) - elif messagelist[0] == 'WORDTYPES': - command = 'WORDTYPES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='lower_',filters=[('is_alpha', ['True']),('is_stop', ['False'])],summaryType = 'uniq') - )) - elif messagelist[0] == 'ROOTS': - command = 'ROOTS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root') - )) - elif messagelist[0] == 'SYLLABLES': - command = 'SYLLABLES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nSyll'))) - elif messagelist[0] == 'WORDLENGTH': - command = 'WORDLENGTH' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt']) - )) - elif messagelist[0] == 'LATINATES': - command = 'LATINATES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'ACADEMICS': - command = 'ACADEMICS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'SENSENUMS': - command = 'SENSENUMS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'LOGSENSENUMS': - command = 'LOGSENSENUMS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nSenses',filters=[('is_alpha', ['True'])],transformations=['log']) - )) - elif messagelist[0] == 'MORPHOLOGY': - command = 'MORPHOLOGY' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='morphology') - )) - elif messagelist[0] == 'MORPHNUMS': - command = 'MORPHNUMS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='nMorph',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'HALROOTFREQS': - command = 'HALROOTFREQS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'HALLOGROOTFREQS': - command = 'HALLOGROOTFREQS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log']) - )) - elif messagelist[0] == 'ROOTFAMSIZES': - command = 'ROOTFAMSIZES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'ROOTPFMFS': - command = 'ROOTPFMFS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'FAMILYSIZES': - command = 'FAMILYSIZES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='family_size',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'TOKFREQS': - command = 'TOKFREQS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])]) - )) - elif messagelist[0] == 'LEMMAFREQS': - command = 'LEMMAfREQS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='lemma_freq'))) - elif messagelist[0] == 'ROOTFREQS': - command = 'ROOTFREQS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='root_Freq'))) - elif messagelist[0] == 'MAXFREQS': - command = 'MAXFREQS' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='max_freq'))) - elif messagelist[0] == 'CONCRETES': - command = 'CONCRETES' - # Position in the list returned equals position - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='concreteness'))) - elif messagelist[0] == 'ABSTRACTTRAITS': - command = 'ABSTRACTTRAITS' - # Position in the list returned equals position - # in the document. Flag 1 if the word names an - # abstract trait, 0 otherwise - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='abstract_trait'))) - elif messagelist[0] == 'ANIMATES': - command = 'ANIMATES' - # Position in the list returned equals position - # in the document. Flag 1 if the word names an animate - # entity, 0 otherwise - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='animate'))) - elif messagelist[0] == 'LOCATIONS': - command = 'LOCATIONS' - # Position in the list returned equals position - # in the document. Flag 1 if the word names an - # animate entity, 0 otherwise - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='location'))) - elif messagelist[0] == 'DEICTICS': - command = 'DEICTICS' - # Position in the list returned equals position in - # the document. Flag 1 if the word names a deictic - # element, 0 otherwise - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='deictic'))) - elif messagelist[0] == 'PARAGRAPHS': - command = 'PARAGRAPHS' - # Items in the list indicate word offsets in the document - # at which paragraph breaks appear - label = messagelist[1] - doc = self.parser.get_document(label) - - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='delimiter_n') - )) - # doc._.paragraph_breaks)) - elif messagelist[0] == 'SENTENCES': - command = 'SENTENCES' - # Items in the list indicate word offsets in the document - # at which paragraph breaks appear - label = messagelist[1] - doc = self.parser.get_document(label) - - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='sents') - )) - #await websocket.send(json.dumps( - # [(sent.start, sent.end) for sent in doc.sents])) - elif messagelist[0] == 'PARAGRAPHLENS': - command = 'PARAGRAPHLENS' - # Items in the list indicate lengths of paragraphs listed - # by offset in GETPARAGRAPHS - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen']) - )) - elif messagelist[0] == 'TRANSITIONPROFILE': - command = 'TRANSITIONPROFILE' - # Returns a rich data structure in a list containing - # (1) total number of transition words in the document - # (2) a dictionary that lists the frequency of a predefined - # set of transition word categories. - # (3) a dictionary that lists the frequency of individual - # transition words - # (4) a list of lists that provides for each transition - # word the word string, its start and stop offsets, - # and its transition word category. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.transition_word_profile)) - elif messagelist[0] == 'TRANSITIONS': - command = 'TRANSITIONS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='transitions') - )) - elif messagelist[0] == 'TRANSITIONDISTANCES': - command = 'TRANSITIONDISTANCES' - # List of cosine distances between ten-word windows - # before and after a transition - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='transition_distances') - )) - elif messagelist[0] == 'SENTENCECOHESIONS': - command = 'SENTENCECOHESIONS' - # List of cosine distances between ten-word windows - # before and after a sentence boundary - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='intersentence_cohesions') - )) - elif messagelist[0] == 'SLIDERCOHESIONS': - command = 'SLIDERCOHESIONS' - # List of cosine distances between ten-word windows - # before and after a sliding window through the text - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='sliding_window_cohesions') - )) - elif messagelist[0] == 'COREFCHAINS': - command = 'COREFCHAINS' - # List of coreference chains found in document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.coref_chains)) - elif messagelist[0] == 'RHEMEDEPTHS': - command = 'RHEMEDEPTHS' - # Syntactic depth of the sentence rheme -- part of - # sentence after the main verb where new information - # is usually placed - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='syntacticDepthsOfRhemes') - )) - elif messagelist[0] == 'THEMEDEPTHS': - command = 'THEMEDEPTHS' - # Syntactic depth of the sentence theme -- part - # of sentence before the main verb where given - # information is usually placed - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='syntacticDepthsOfThemes') - )) - elif messagelist[0] == 'WEIGHTEDDEPTHS': - command = 'WEIGHTEDDEPTHS' - # Syntactic depth weighted to penalize - # left-embedded structures - # that tend to be harder to process - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='weightedSyntacticDepths') - )) - elif messagelist[0] == 'WEIGHTEDBREADTHS': - command = 'WEIGHTEDBREADTHS' - # Syntactic breadth -- measure of extent to which sentence - # structure is additive, consisting of coordinated - # structures and loosely appended modifiers typical of - # spoken, often unplanned sentence production - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='weightedSyntacticBreadths') - )) - elif messagelist[0] == 'SENTENCETYPES': - # tuple giving number and location of sentence types - # format: - # (1,1,1,1,[1,2,3,4]) would be the record for a text that - # had four sentences -- simple sentence, compound sentence, - # complex sentence, and compound/complex sentence, in - # that order. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType='Doc',indicator='sentence_types') - )) - elif messagelist[0] == 'SYNTACTICPROFILE': - command = 'SYNTACTICPROFILE' - # Returns a dictionary containing frequency information - # about the syntactic relations and categories in the text. - # This includes information about the frequency of parts - # of speech, morphological categories, and syntactic - # dependencies between specific parts of speech. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.syntacticProfile)) - elif messagelist[0] == 'NORMEDSYNTACTICPROFILE': - command = 'NORMEDSYNTACTICPROFILE' - # Returns a dictionary containing normalized - # frequency information (proportionas) for the - # syntactic relations and categories in the text. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.syntacticProfileNormed)) - elif messagelist[0] == 'QUOTEDTEXT': - command = 'QUOTEDTEXT' - # 1 for tokens within quotation marks, 0 for other text - # Position in the list corresponds to offset of token - # in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_quoted') - )) - elif messagelist[0] == 'DIRECTSPEECHSPANS': - command = 'DIRECTSPEECHSPANS' - # Data about subset of quoted text -- specifically, - # quoted text that is attributed to a specific - # speaker. - # - # Returns a list of lists with three top level - # elements: - # - # 1. Speaker: a list of offsets to tokens - # referring to the speaker(s) - # 2. Addressee: a list of offsets to tokens - # referring to the person(s) spoken to. - # 3. Span start offset - # 4. Span end offset. - # - # Note that first and second person pronouns - # inside direct speech may reference a person - # explicitly identified in the direct speech - # framing text. Coreferee reference resolution - # may apply, so that the speaker and addressee - # references may be to a proper noun at the head - # of a pronominal reference chain that includes - # the direct speech frame. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech') - )) - elif messagelist[0] == 'IN_DIRECT_SPEECH': - # 1 for tokens within quoted stretches of direct speech, - # 0 for other text. Position in the list corresponds to - # offset of token in the document - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_in_direct_speech') - )) - elif messagelist[0] == 'TENSECHANGES': - # list of positions where tense changed in the main - # document flow (not in direct speech/quotations, - # with flag to indicate whether shift was to past - # tense or to present tense. - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.vwp_tense_changes)) - elif messagelist[0] == 'PERSPECTIVES': - # list of positions where perspective is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_perspective') - )) - elif messagelist[0] == 'ATTRIBUTIONS': - # list of positions where attribution is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_attribution') - )) - elif messagelist[0] == 'SOURCES': - # list of positions where source is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_source') - )) - elif messagelist[0] == 'CITES': - # list of positions where source is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_cite') - )) - elif messagelist[0] == 'STATEMENTSOFFACT': - # list of positions where source is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_fact') - )) - elif messagelist[0] == 'STATEMENTSOFOPINION': - # list of positions where source is indicated - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_statements_of_opinion') - )) - elif messagelist[0] == 'PERSPECTIVESPANS': - command = 'PERSPECTIVESPANS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.vwp_perspective_spans)) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='vwp_perspective_spans') - ) - elif messagelist[0] == 'STANCEMARKERS': - command = 'STANCEMARKERS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.vwp_stance_markers)) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='vwp_stance_markers') - ) - - elif messagelist[0] == 'CLAIMTEXTS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_claim') - )) - - elif messagelist[0] == 'DISCUSSIONTEXTS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_discussion') - )) - - elif messagelist[0] == 'EMOTIONWORDS': - command = 'EMOTIONWORDS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_emotionword') - )) - - elif messagelist[0] == 'CHARACTERWORDS': - command = 'CHARACTERWORDS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_character_traits') - )) - - elif messagelist[0] == 'EMOTIONALSTATES': - command = 'EMOTIONALSTATES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='vwp_emotion_states') - ) - elif messagelist[0] == 'CHARACTERTRAITS': - command = 'CHARACTERTRAITS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.vwp_character_traits)) - elif messagelist[0] == 'PROPOSITIONALATTITUDES': - command = 'PROPOSITIONALATTITUDES' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_propositional_attitudes') - )) - elif messagelist[0] == 'SOCIAL_AWARENESS': - command = 'SOCIAL_AWARENESS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(infoType="Doc",indicator='vwp_social_awareness') - )) - elif messagelist[0] == 'CONCRETEDETAILS': - command = 'CONCRETEDETAILS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(indicator='concrete_detail') - ) - elif messagelist[0] == 'INTERACTIVELANGUAGE': - command = 'INTERACTIVELANGUAGE' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_interactive') - )) - elif messagelist[0] == 'ARGUMENTWORDS': - command = 'ARGUMENTWORDS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_argumentword') - )) - elif messagelist[0] == 'ARGUMENTLANGUAGE': - command = 'ARGUMENTLANGUAGE' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_argumentation') - )) - elif messagelist[0] == 'EXPLICITARGUMENTWORDS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_explicit_argument') - )) - elif messagelist[0] == 'SUBJECTIVITYRATINGS': - command = 'SUBJECTIVITYRATINGS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='subjectivity') - )) - elif messagelist[0] == 'SENTIMENTRATINGS': - command = 'SENTIMENTRATINGS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_sentiment') - )) - elif messagelist[0] == 'TONERATINGS': - command = 'TONERATINGS2' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='vwp_tone') - )) - elif messagelist[0] == 'POLARITYRATINGS': - command = 'POLARITYRATINGS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='polarity') - )) - elif messagelist[0] == 'ASSESSMENTS': - command = 'ASSESSMENTS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.assessments)) - elif messagelist[0] == 'PASTTENSESCOPE': - command = 'PASTTENSESCOPE' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='in_past_tense_scope') - )) - elif messagelist[0] == 'GOVERNINGSUBJECTS': - command = 'GOVERNINGSUBJECTS' - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='governing_subject') - )) - elif messagelist[0] == 'CLUSTERS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='clusterID') - )) - elif messagelist[0] == 'PROMPTLANGUAGE': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.prompt_language)) - elif messagelist[0] == 'PROMPTRELATED': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.prompt_related)) - elif messagelist[0] == 'MAINIDEAS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='main_ideas') - ) - elif messagelist[0] == 'SUPPORTINGIDEAS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='supporting_ideas') - ) - elif messagelist[0] == 'SUPPORTINGDETAILS': - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send( - doc._.AWE_Info(infoType="Doc",indicator='supporting_details') - ) - elif messagelist[0] == 'CLUSTERINFO': - command = 'CLUSTERINFO' - # Get the local word clusters our algorithm has - # clustered the words of the student document into - # - # The data is a list of records in this format: - # 1. The clusterID. - # 2. The cluster rating, which is roughly a measure - # of how important the cluster seems to be in the - # docyument as measured by the number of words in it - # and their relative infrequency - # 3. A list of the actual word strings in each cluster - # 4. The offsets of the words assigned to each cluster - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.clusterInfo)) - elif messagelist[0] == 'DEVWORDS': - command = 'DEVWORDS' - # offset of the logical subject that governs - # the domain this token belongs to - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps( - doc._.AWE_Info(indicator='devword') - )) - elif messagelist[0] == 'NOMINALREFERENCES': - command = 'NOMINALREFERENCES' - # offset of the logical subject that governs - # the domain this token belongs to - label = messagelist[1] - doc = self.parser.get_document(label) - await websocket.send(json.dumps(doc._.nominalReferences)) - elif messagelist[0] == 'DOCSUMMARYLABELS': - command = 'DOCSUMMARYLABELS' - await websocket.send(json.dumps(self.summaryLabels)) - elif messagelist[0] == 'DOCSUMMARYFEATS': - command = 'DOCSUMMARYFEATS' - label = messagelist[1] - doc = self.parser.get_document(label) - summaryFeats = [ - doc._.AWE_Info(indicator='nSyll',summaryType="mean"), - doc._.AWE_Info(indicator='nSyll',summaryType="median"), - doc._.AWE_Info(indicator='nSyll',summaryType="max"), - doc._.AWE_Info(indicator='nSyll',summaryType="min"), - doc._.AWE_Info(indicator='nSyll',summaryType="stdev"), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='mean'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='median'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='max'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='min'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True'])], transformations=['len', 'sqrt'], summaryType='stdev'), - doc._.AWE_Info(indicator='is_latinate',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='is_academic',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='family_size', filters=[('is_alpha', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='mean'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='median'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='max'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='min'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True'])], transformations=['log'], summaryType='stdev'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='mean'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='median'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='max'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='min'), - doc._.AWE_Info(indicator='min_root_freq',filters=[('is_alpha', ['True'])],transformations=['log'],summaryType='stdev'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='mean'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='median'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='max'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='min'), - doc._.AWE_Info(indicator='root_famSize',filters=[('is_alpha', ['True'])],summaryType='stdev'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='mean'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='median'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='max'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='min'), - doc._.AWE_Info(indicator='root_pfmf',filters=[('is_alpha', ['True'])],summaryType='stdev'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='median'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='max'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='min'), - doc._.AWE_Info(indicator='token_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='mean'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='median'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='max'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='min'), - doc._.AWE_Info(indicator='lemma_freq',filters=[('is_alpha', ['True'])],summaryType='stdev'), - doc._.AWE_Info(indicator='max_freq',summaryType='mean'), - doc._.AWE_Info(indicator='max_freq',summaryType='median'), - doc._.AWE_Info(indicator='max_freq',summaryType='max'), - doc._.AWE_Info(indicator='max_freq',summaryType='min'), - doc._.AWE_Info(indicator='max_freq',summaryType='stdev'), - doc._.AWE_Info(indicator='abstract_trait',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='animate',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='deictic',filters=[('is_alpha', ['True'])], summaryType="proportion"), - doc._.AWE_Info(indicator='root', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), - doc._.AWE_Info(indicator='lemma_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), - doc._.AWE_Info(indicator='lower_', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), - doc._.AWE_Info(indicator='text', filters=[('is_alpha', ['True']),('is_stop', ['False']),('pos_', content_pos)], summaryType = 'total'), - doc._.AWE_Info(infoType="Doc",indicator='delimiter_n',summaryType='total'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['tokenlen'],summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='proportion'), - doc._.AWE_Info(infoType="Doc",indicator='transitions',summaryType='total'), - doc._.AWE_Info(infoType="Doc",indicator='transitions',transformations=['text'],summaryType='counts'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='transition_distances',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='intersentence_cohesions',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='sliding_window_cohesions',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',summaryType='counts'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='corefChainInfo',transformations=['len'],summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='sents',summaryType='counts'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='sents',transformations=['len'],summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='sentenceThemes',transformations=['tokenlen'],summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfRhemes',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='syntacticDepthsOfThemes',summaryType='stdev'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='mean'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='median'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='max'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='min'), - doc._.AWE_Info(indicator='weightedSyntacticDepth',summaryType='stdev'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='mean'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='median'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='max'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='min'), - doc._.AWE_Info(indicator='weightedSyntacticBreadth',summaryType='stdev'), - doc._.syntacticVariety, - doc._.AWE_Info(indicator='in_past_tense_scope',summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_argumentation',summaryType='proportion'), - doc._.AWE_Info(infoType="Doc",indicator='vwp_direct_speech',summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_egocentric',summaryType='proportion'), - doc._.AWE_Info(indicator='vwp_allocentric',summaryType='proportion'), - doc._.AWE_Info(indicator='subjectivity',summaryType='mean'), - doc._.AWE_Info(indicator='subjectivity',summaryType='median'), - doc._.AWE_Info(indicator='subjectivity',summaryType='min'), - doc._.AWE_Info(indicator='subjectivity',summaryType='max'), - doc._.AWE_Info(indicator='subjectivity',summaryType='stdev'), - doc._.AWE_Info(indicator='polarity',summaryType='mean'), - doc._.AWE_Info(indicator='polarity',summaryType='median'), - doc._.AWE_Info(indicator='polarity',summaryType='min'), - doc._.AWE_Info(indicator='polarity',summaryType='max'), - doc._.AWE_Info(indicator='polarity',summaryType='stdev'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='mean'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='median'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='min'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='max'), - doc._.AWE_Info(indicator='vwp_sentiment',summaryType='stdev'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='mean'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='median'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='min'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='max'), - doc._.AWE_Info(infoType="Doc",indicator='main_cluster_spans',transformations=['len'],summaryType='stdev'), - doc._.AWE_Info(indicator='devword', summaryType='proportion'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='nSyll', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='nMorph', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='nSenses', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='token_freq', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='mean'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='median'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='min'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='max'), - doc._.AWE_Info(indicator='concreteness', filters=[('is_alpha', ['True']),('devword', ['True'])], summaryType='stdev') - ] - await websocket.send(json.dumps(summaryFeats)) - else: - await websocket.send(False) - -# --- [ MAIN ] -------------------------------------------------------------------------- - -if __name__ == '__main__': - print('parser server loading') - wsc = parserServer() - -# --- [ END ] --------------------------------------------------------------------------- diff --git a/setup.cfg b/setup.cfg index 421d681..79d4a3c 100755 --- a/setup.cfg +++ b/setup.cfg @@ -29,9 +29,9 @@ include_package_data = True packages = find: python_requires = >=3.9 install_requires = - awe_languagetool @ git+https://github.com/ArgLab/AWE_LanguageTool.git + awe_languagetool @ git+https://github.com/ArgLab/AWE_LanguageTool.git@default_config awe_spellcorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git - awe_components @ git+https://github.com/ArgLab/AWE_Components.git + awe_components @ git+https://github.com/ArgLab/AWE_Components.git@numpy_version websockets websocket-client aiohttp From 77df65dc572f0b0627c599f54183f41e2bbae20c Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Thu, 14 Nov 2024 21:04:48 -0500 Subject: [PATCH 38/39] Update setup.cfg removed branch designations --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 79d4a3c..421d681 100755 --- a/setup.cfg +++ b/setup.cfg @@ -29,9 +29,9 @@ include_package_data = True packages = find: python_requires = >=3.9 install_requires = - awe_languagetool @ git+https://github.com/ArgLab/AWE_LanguageTool.git@default_config + awe_languagetool @ git+https://github.com/ArgLab/AWE_LanguageTool.git awe_spellcorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git - awe_components @ git+https://github.com/ArgLab/AWE_Components.git@numpy_version + awe_components @ git+https://github.com/ArgLab/AWE_Components.git websockets websocket-client aiohttp From 2676cad22ecd4ad94d34458a57f3cb4590d18540 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 19 Dec 2024 11:06:41 -0500 Subject: [PATCH 39/39] Removed requirements in favor of setup.cfg --- requirements.txt | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index f292564..0000000 --- a/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -awe_languagetool @ git+https://github.com/ArgLab/AWE_LanguageTool.git -awe_spellcorrect @ git+https://github.com/ArgLab/AWE_SpellCorrect.git -awe_components @ git+https://github.com/ArgLab/AWE_Components.git -websockets -websocket-client -aiohttp -requests -streamlit -st-annotated-text -names -openpyxl -tensorflow