Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions exercises/1901010103/d10/mymodule/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/python
import sys
import json
sys.path.append('/Users/Yang/GitHub:PJ1/selfteaching-python-camp/exercises/1901010120/d08/mymodule/')

from stats_word import stats_text
from os import path
import json

file_path = path.join(path.dirname(path.abspath(__file__)),'./tang300.json')
with open(file_path,'r', encoding="utf-8") as f_poems:
poems_json = json.load(f_poems)

all_poems = ""
for poems_info in poems_json:
all_poems += poems_info["contents"]

try:
en_result, cn_result = stats_text("", all_poems)
print("EN words result: ", en_result)
print("CN words result: ", [(word,count) for word, count in cn_result if len(word) >= 2][:20])
except ValueError as e:
print("Exception catched.")
print(e)
46 changes: 46 additions & 0 deletions exercises/1901010103/d10/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/python

import string
import jieba
from collections import Counter

def stats_text_en(en_text):
if not isinstance(en_text, str):
raise ValueError("The method only accepts type str.")

for en_special_word in string.punctuation:
if en_special_word in en_text:
en_text = en_text.replace(en_special_word, "")

en_word_counter = Counter()
en_text = en_text.split()
for en_word in en_text:
en_word_counter[en_word] += 1

return en_word_counter.most_common()


def stats_text_cn(cn_text):
if not isinstance(cn_text, str):
raise ValueError("The method only accepts type str.")

#cn_special_words = "!“”#$%&‘’()*+,-。/:;、……<=>?@[]「」《》^_`{|}~\n"
#for cn_special_word in cn_special_words:
# if cn_special_word in cn_text:
# cn_text = cn_text.replace(cn_special_word, "")

#把字符串给cut作为第一个参数
cn_text = jieba.cut(cn_text, cut_all=False)

cn_word_counter = Counter()
for cn_word in cn_text:
cn_word_counter[cn_word] += 1

return cn_word_counter.most_common()


def stats_text(en_text, cn_text):
if (not isinstance(en_text, str)) or (not isinstance(cn_text, str)):
raise ValueError("The method only accepts type str.")

return stats_text_en(en_text), stats_text_cn(cn_text)
36 changes: 36 additions & 0 deletions exercises/1901010103/d11/mymodule/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/python
import yagmail
import requests
import getpass
import sys
from pyquery import PyQuery
sys.path.append('/Users/Yang/GitHub:PJ1/selfteaching-python-camp/exercises/1901010120/d11/mymodule/')
from stats_word import stats_text
from os import path

#提取微信地址和正文
content_url = "https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA"
html_code = requests.get(content_url).text
document = PyQuery(html_code)
content = document("#js_content").text().replace("\n", "")


#file_path = path.join(path.dirname(path.abspath(__file__)),'./tang300.json')
#with open(file_path,'r', encoding="utf-8") as f_poems:
# poems_json = json.load(f_poems)

#all_poems = ""
#for poems_info in poems_json:
# all_poems += poems_info["contents"]

try:
en_result, cn_result = stats_text("",content)
#print cn_result
smtp_host = "smtp.sina.com"
sender = input("Please enter the sender's email address: ")
password = getpass.getpass("Please enter the sender's email password: ")
recipient = input("Please enter the recipient's email address: ")
yagmail.SMTP(user=sender, password=password, host=smtp_host).send(recipient, "Cutted words", str(cn_result))
except ValueError as e:
print("Exception catched.")
print(e)
46 changes: 46 additions & 0 deletions exercises/1901010103/d11/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/python

import string
import jieba
from collections import Counter

def stats_text_en(en_text):
if not isinstance(en_text, str):
raise ValueError("The method only accepts type str.")

for en_special_word in string.punctuation:
if en_special_word in en_text:
en_text = en_text.replace(en_special_word, "")

en_word_counter = Counter()
en_text = en_text.split()
for en_word in en_text:
en_word_counter[en_word] += 1

return en_word_counter.most_common()


def stats_text_cn(cn_text):
if not isinstance(cn_text, str):
raise ValueError("The method only accepts type str.")

cn_special_words = "!“”#$%&‘’()*+,-。/:;、……<=>?@[]「」《》^_`{|}~\n"
for cn_special_word in cn_special_words:
if cn_special_word in cn_text:
cn_text = cn_text.replace(cn_special_word, "")

#把字符串给cut作为第一个参数
cn_text = jieba.cut(cn_text, cut_all=False)

cn_word_counter = Counter()
for cn_word in cn_text:
cn_word_counter[cn_word] += 1

return cn_word_counter.most_common(100)


def stats_text(en_text, cn_text):
if (not isinstance(en_text, str)) or (not isinstance(cn_text, str)):
raise ValueError("The method only accepts type str.")

return stats_text_en(en_text), stats_text_cn(cn_text)