-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper.py
More file actions
50 lines (43 loc) · 2.67 KB
/
scraper.py
File metadata and controls
50 lines (43 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from selenium import webdriver
from thesaurus import Word
from nltk.corpus import wordnet
# from selenium.webdriver import ActionChains
# import selenium.common.exceptions
class Driver:
def __init__(self):
self.driver = webdriver.Firefox()
self.homophone_site = "http://www.homophone.com/random"
self.synonym_site = "http://www.thesaurus.com/browse/"
self.xpaths = {
"homophs": "/html/body/div[3]/div[2]/div[1]/div[1]/h3",
"first_homoph": "/html/body/div[3]/div[2]/div[2]/div[1]/div/div/span",
"first_homoph_nounity": "/html/body/div[3]/div[2]/div[2]/div[1]/div/div/ol/li[1]/span",
"second_homoph": "/html/body/div[3]/div[2]/div[2]/div[2]/div/div/span",
"scond_homoph_nounity": "/html/body/div[3]/div[2]/div[2]/div[2]/div/div/ol/li[1]/span",
"most_relevant_synonym": "/html/body/div[2]/div[2]/div[1]/div/div[3]/div[2]/div[2]/div[3]/div/ul[1]/li[1]/a/span[1]"
}
def get_random_homophones(self):
self.driver.get(self.homophone_site)
# words = self.driver.find_element_by_xpath(self.xpaths["homophs"])
first_word = self.driver.find_element_by_xpath(self.xpaths["first_homoph"]).get_attribute("innerHTML")
first_word_nounity = self.driver.find_element_by_xpath(self.xpaths["first_homoph_nounity"]).get_attribute("innerHTML")
second_word = self.driver.find_element_by_xpath(self.xpaths["second_homoph"]).get_attribute("innerHTML")
second_word_nounity = self.driver.find_element_by_xpath(self.xpaths["scond_homoph_nounity"]).get_attribute("innerHTML")
return first_word, first_word_nounity, second_word, second_word_nounity
def get_specific_random_homophones(self, nounity1, nounity2):
while True:
first_word, first_word_nounity, second_word, second_word_nounity = self.get_random_homophones()
print(first_word_nounity, second_word_nounity)
if first_word_nounity == nounity1 and second_word_nounity == nounity2:
return first_word, first_word_nounity, second_word, second_word_nounity
elif first_word_nounity == nounity2 and second_word_nounity == nounity1:
return second_word, second_word_nounity, first_word, first_word_nounity
def get_synonym(self, input_word):
self.driver.get(self.synonym_site + input_word)
most_relevant_synonym = self.driver.find_element_by_xpath(self.xpaths["most_relevant_synonym"]).get_attribute("innerHTML")
return most_relevant_synonym
def get_synonyms(self, input_word):
w = Word(input_word)
print(wordnet.synsets('dog'))
print(wordnet.synsets('test'))
return w.synonyms()