Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions docs/source/cli_tools.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
SinaTools Command Line
=======================
.. toctree::
:maxdepth: 2
:titlesonly:
SinaTools Command Line
=======================

All command-line tools are available through the unified ``sinatools`` entrypoint::

sinatools <command> [options]

.. toctree::
:maxdepth: 2
:titlesonly:
:caption: Modules:


Expand Down
54 changes: 7 additions & 47 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,53 +37,13 @@
]


setup(
entry_points={
'console_scripts':[
('install_env='
'sinatools.install_env:main'),
('arStrip='
'sinatools.CLI.utils.arStrip:main'),
('jaccard_similarity='
'sinatools.CLI.utils.jaccard:main'),
('implication='
'sinatools.CLI.utils.implication:main'),
('sentence_tokenizer='
'sinatools.CLI.utils.sentence_tokenizer:main'),
('transliterate='
'sinatools.CLI.utils.text_transliteration:main'),
('morphology_analyzer='
'sinatools.CLI.morphology.morph_analyzer:main'),
('alma_multi_word='
'sinatools.CLI.morphology.ALMA_multi_word:main'),
('entity_extractor='
'sinatools.CLI.ner.entity_extractor:main'),
('remove_punctuation='
'sinatools.CLI.utils.remove_punctuation:main'),
('remove_latin='
'sinatools.CLI.utils.remove_latin:main'),
('wsd='
'sinatools.CLI.wsd.disambiguator:main'),
('corpus_tokenizer='
'sinatools.CLI.utils.corpus_tokenizer:main'),
('appdatadir='
'sinatools.CLI.DataDownload.get_appdatadir:main'),
('download_files='
'sinatools.CLI.DataDownload.download_files:main'),
('corpus_entity_extractor='
'sinatools.CLI.ner.corpus_entity_extractor:main'),
('text_dublication_detector='
'sinatools.CLI.utils.text_dublication_detector:main'),
('evaluate_synonyms='
'sinatools.CLI.synonyms.evaluate_synonyms:main'),
('extend_synonyms='
'sinatools.CLI.synonyms.extend_synonyms:main'),
('semantic_relatedness='
'sinatools.CLI.semantic_relatedness.compute_relatedness:main'),
('relation_extractor='
'sinatools.CLI.relations.relation_extractor:main'),
],
},
setup(
entry_points={
'console_scripts':[
('sinatools='
'sinatools.cli:main'),
],
},
data_files=[('sinatools', ['sinatools/environment.yml'])],
package_data={'sinatools': ['data/*.pickle', 'environment.yml']},
install_requires=requirements,
Expand Down
Empty file.
15 changes: 8 additions & 7 deletions sinatools/CLI/DataDownload/download_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,23 @@
"""

import argparse
from sinatools.DataDownload.downloader import download_file
from sinatools.DataDownload.downloader import download_files
from sinatools.DataDownload.downloader import get_appdatadir
from sinatools.DataDownload.downloader import download_folder_from_hf
from sinatools.DataDownload.downloader import urls


def main():
def main(argv=None):
from sinatools.DataDownload.downloader import download_file
from sinatools.DataDownload.downloader import download_files
from sinatools.DataDownload.downloader import download_folder_from_hf
from sinatools.DataDownload.downloader import get_appdatadir
from sinatools.DataDownload.downloader import urls

parser = argparse.ArgumentParser(description="Download files from specified URLs.")
parser.add_argument('-f', '--files', nargs="*",
help="Names of the files to download. Available files are: "
f"{', '.join(urls.keys())}. If no file is specified, all files will be downloaded.")

get_appdatadir()

args = parser.parse_args()
args = parser.parse_args(argv)

if args.files:
all_files = args.files
Expand Down
8 changes: 8 additions & 0 deletions sinatools/CLI/DataDownload/get_appdatadir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
def main(argv=None):
from sinatools.DataDownload.downloader import get_appdatadir

print(get_appdatadir())


if __name__ == "__main__":
main()
Empty file added sinatools/CLI/__init__.py
Empty file.
9 changes: 5 additions & 4 deletions sinatools/CLI/morphology/ALMA_multi_word.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import argparse
from sinatools.morphology.ALMA_multi_word import ALMA_multi_word
import json
from sinatools.utils.readfile import read_file

def main():
def main(argv=None):
parser = argparse.ArgumentParser(description='Multi-Word Analysis using SinaTools')

# Adding arguments for the multi-word input or file containing the multi-word input
parser.add_argument('--multi_word', type=str, help='Multi-word text to be analyzed')
parser.add_argument('--file', type=str, help='File containing the multi-word text to be analyzed')

args = parser.parse_args()
args = parser.parse_args(argv)

from sinatools.morphology.ALMA_multi_word import ALMA_multi_word
from sinatools.utils.readfile import read_file

if args.multi_word is None and args.file is None:
print("Error: Either --multi_word or --file argument must be provided.")
Expand Down
Empty file.
9 changes: 5 additions & 4 deletions sinatools/CLI/morphology/morph_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,8 @@
"""

import argparse
from sinatools.morphology.morph_analyzer import analyze
from sinatools.utils.readfile import read_file

def main():
def main(argv=None):
parser = argparse.ArgumentParser(description='Morphological Analysis using SinaTools')

parser.add_argument('--text', type=str, help='Text to be morphologically analyzed')
Expand All @@ -60,7 +58,10 @@ def main():
parser.add_argument('--task', type=str, default='full', choices=['lemmatization', 'pos', 'root', 'full'], help='Task for the result filter [lemmatization, pos, root, full] (default: full)')
parser.add_argument('--flag', type=str, default='1', choices=['1','*'], help='The flag to filter the returned results')

args = parser.parse_args()
args = parser.parse_args(argv)

from sinatools.morphology.morph_analyzer import analyze
from sinatools.utils.readfile import read_file

if args.text is None and args.file is None:
print("Error: Either --text or --file argument must be provided.")
Expand Down
Empty file added sinatools/CLI/ner/__init__.py
Empty file.
16 changes: 9 additions & 7 deletions sinatools/CLI/ner/corpus_entity_extractor.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import os
import csv
from sinatools.utils.tokenizer import sentence_tokenizer
from sinatools.utils.tokenizers_words import simple_word_tokenize
import pandas as pd
import argparse
from sinatools.ner.entity_extractor import extract

"""
The following command takes a CSV file as input. It splits a specific column into tokens and tags them using named entity recognition (NER). It retains all other columns as they are, and it also adds sentences and tokens. Additionally, it assigns an auto-incrementing ID, a sentence ID, and a global sentence ID to each token. As follows:
Expand Down Expand Up @@ -33,11 +29,18 @@ def jsons_to_list_of_lists(json_list):
return [[d['token'], d['tags']] for d in json_list]

def combine_tags(sentence):
from sinatools.ner.entity_extractor import extract

output = jsons_to_list_of_lists(extract(sentence, "nested"))
return [word[1] for word in output]


def corpus_tokenizer(input_csv, output_csv, text_column, additional_columns, row_id, global_sentence_id):
import pandas as pd

from sinatools.utils.tokenizer import sentence_tokenizer
from sinatools.utils.tokenizers_words import simple_word_tokenize

print(input_csv, output_csv, text_column, additional_columns)
row_id = row_id - 1
global_sentence_id = global_sentence_id - 1
Expand Down Expand Up @@ -68,7 +71,7 @@ def corpus_tokenizer(input_csv, output_csv, text_column, additional_columns, row

writer.writerow(output_dic)

def main():
def main(argv=None):
parser = argparse.ArgumentParser(description="CSV NER Tagging Tool")
parser.add_argument("--input_csv", help="Path to the input CSV file")
parser.add_argument("--text_column", required=True,
Expand All @@ -82,12 +85,11 @@ def main():
parser.add_argument("--global_sentence_id", default="1",
help="global_sentence_id to starts with")

args = parser.parse_args()
args = parser.parse_args(argv)
corpus_tokenizer(args.input_csv, args.output_csv, args.text_column, args.additional_columns, int(args.row_id), int(args.global_sentence_id))


if __name__ == "__main__":
main()



19 changes: 12 additions & 7 deletions sinatools/CLI/ner/entity_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,33 +37,38 @@

import argparse
import json
import pandas as pd
from sinatools.ner.entity_extractor import extract
from sinatools.utils.tokenizer import corpus_tokenizer
from sinatools.utils.tokenizers_words import simple_word_tokenize

def jsons_to_list_of_lists(json_list):
return [[d['token'], d['tags']] for d in json_list]

def combine_tags(sentence):
from sinatools.ner.entity_extractor import extract

output = jsons_to_list_of_lists(extract(sentence, "nested"))
return [word[1] for word in output]


def main():
def main(argv=None):
parser = argparse.ArgumentParser(description='NER Analysis using ArabiNER')

parser.add_argument('--text', type=str, help='Text to be analyzed for Named Entity Recognition')
parser.add_argument('--dir', type=str, help='dir containing the text files to be analyzed for Named Entity Recognition')
parser.add_argument('--output_csv', type=str, help='Output CSV file to write the results')

args = parser.parse_args()
args = parser.parse_args(argv)

from sinatools.ner.entity_extractor import extract

if args.text is not None:
results = extract(args.text)
# Print the results in JSON format
print(json.dumps(results, ensure_ascii=False, indent=4))
elif args.dir is not None:
import pandas as pd

from sinatools.utils.tokenizer import corpus_tokenizer
from sinatools.utils.tokenizers_words import simple_word_tokenize

corpus_tokenizer(args.dir, args.output_csv)
df = pd.read_csv(args.output_csv)
df['NER tags'] = None
Expand All @@ -88,4 +93,4 @@ def main():


if __name__ == '__main__':
main()
main()
Empty file.
15 changes: 10 additions & 5 deletions sinatools/CLI/relations/relation_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,27 @@
"""

import argparse
from sinatools.relations.relation_extractor import event_argument_relation_extraction
from sinatools.utils.readfile import read_file

def main():
def main(argv=None):
parser = argparse.ArgumentParser(description='Relation Extraction using SinaTools')

parser.add_argument('--text', type=str, help='The text from which events need to be extracted.')
parser.add_argument('--file', type=str, help='File containing the text from which events need to be extracted.')

args = parser.parse_args()
args = parser.parse_args(argv)

if args.text is None and args.file is None:
print("Error: Either --text or --file argument must be provided.")
return

input_text = args.text if args.text else " ".join(read_file(args.file))
if args.file:
from sinatools.utils.readfile import read_file

input_text = " ".join(read_file(args.file))
else:
input_text = args.text

from sinatools.relations.relation_extractor import event_argument_relation_extraction

results = event_argument_relation_extraction(input_text)

Expand Down
Empty file.
7 changes: 4 additions & 3 deletions sinatools/CLI/semantic_relatedness/compute_relatedness.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,22 @@
"""

import argparse
from sinatools.semantic_relatedness.compute_relatedness import get_similarity_score

def main():
def main(argv=None):
parser = argparse.ArgumentParser(description='Computes the degree of association between two sentences across various dimensions, meaning, underlying concepts, domain-specificity, topic overlap, viewpoint alignment.')

parser.add_argument('--sentence1', type=str, help='The first sentence to be compute similarity based on')
parser.add_argument('--sentence2', type=str, help='The second sentence to be compute similarity based on')


args = parser.parse_args()
args = parser.parse_args(argv)

if args.sentence1 is None and args.sentence2 is None:
print("Error: Either --sentence1 or --sentence2 argument must be provided.")
return

from sinatools.semantic_relatedness.compute_relatedness import get_similarity_score

score = get_similarity_score(args.sentence1, args.sentence2)

print(score)
Expand Down
Empty file.
7 changes: 4 additions & 3 deletions sinatools/CLI/synonyms/evaluate_synonyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,21 @@
"""

import argparse
from sinatools.synonyms.synonyms_generator import evaluate_synonyms

def main():
def main(argv=None):
parser = argparse.ArgumentParser(description='Morphological Analysis using SinaTools')

parser.add_argument('--synset', type=str, help='Set of synonyms seperated by |')
parser.add_argument('--level', type=int, help='The depth of edges the algorithm needs to reach')

args = parser.parse_args()
args = parser.parse_args(argv)

if args.synset is None and args.level is None:
print("Error: Either --synset or --level argument must be provided.")
return

from sinatools.synonyms.synonyms_generator import evaluate_synonyms

results = evaluate_synonyms(args.synset, args.level)

print(results)
Expand Down
7 changes: 4 additions & 3 deletions sinatools/CLI/synonyms/extend_synonyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,21 @@
"""

import argparse
from sinatools.synonyms.synonyms_generator import extend_synonyms

def main():
def main(argv=None):
parser = argparse.ArgumentParser(description='Morphological Analysis using SinaTools')

parser.add_argument('--synset', type=str, help='Set of synonyms seperated by |')
parser.add_argument('--level', type=int, help='The depth of edges the algorithm needs to reach')

args = parser.parse_args()
args = parser.parse_args(argv)

if args.synset is None and args.level is None:
print("Error: Either --synset or --level argument must be provided.")
return

from sinatools.synonyms.synonyms_generator import extend_synonyms

results = extend_synonyms(args.synset, args.level)

print(results)
Expand Down
Loading