selfteaching · liujiayi0042 · Aug 23, 2019 · Aug 20, 2019 · Aug 22, 2019
diff --git a/exercises/1901010103/d10/mymodule/main.py b/exercises/1901010103/d10/mymodule/main.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+import sys
+import json
+sys.path.append('/Users/Yang/GitHub:PJ1/selfteaching-python-camp/exercises/1901010120/d08/mymodule/')
+
+from stats_word import stats_text
+from os import path
+import json
+
+file_path = path.join(path.dirname(path.abspath(__file__)),'./tang300.json')
+with open(file_path,'r', encoding="utf-8") as f_poems:
+    poems_json = json.load(f_poems)
+
+all_poems = ""
+for poems_info in poems_json:
+    all_poems += poems_info["contents"]
+
+try:
+    en_result, cn_result = stats_text("", all_poems)
+    print("EN words result: ", en_result)
+    print("CN words result: ", [(word,count) for word, count in cn_result if len(word) >= 2][:20])
+except ValueError as e:
+    print("Exception catched.")
+    print(e)
diff --git a/exercises/1901010103/d10/mymodule/stats_word.py b/exercises/1901010103/d10/mymodule/stats_word.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+
+import string
+import jieba
+from collections import Counter
+
+def stats_text_en(en_text):
+    if not isinstance(en_text, str):
+        raise ValueError("The method only accepts type str.")
+
+    for en_special_word in string.punctuation:
+        if en_special_word in en_text:
+            en_text = en_text.replace(en_special_word, "")
+
+    en_word_counter = Counter()
+    en_text = en_text.split()
+    for en_word in en_text:
+        en_word_counter[en_word] += 1
+
+    return en_word_counter.most_common()
+
+
+def stats_text_cn(cn_text):
+    if not isinstance(cn_text, str):
+        raise ValueError("The method only accepts type str.")
+
+    #cn_special_words = "！“”#$%&‘’（）*+，-。/：；、……<=>？@[]「」《》^_`{|}~\n"
+    #for cn_special_word in cn_special_words:
+    #    if cn_special_word in cn_text:
+    #        cn_text = cn_text.replace(cn_special_word, "")
+
+    #把字符串给cut作为第一个参数
+    cn_text = jieba.cut(cn_text, cut_all=False)
+
+    cn_word_counter = Counter()
+    for cn_word in cn_text:
+        cn_word_counter[cn_word] += 1
+
+    return cn_word_counter.most_common()
+
+
+def stats_text(en_text, cn_text):
+    if (not isinstance(en_text, str)) or (not isinstance(cn_text, str)):
+        raise ValueError("The method only accepts type str.")
+
+    return stats_text_en(en_text), stats_text_cn(cn_text)
diff --git a/exercises/1901010103/d11/mymodule/main.py b/exercises/1901010103/d11/mymodule/main.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+import yagmail
+import requests
+import getpass
+import sys
+from pyquery import PyQuery
+sys.path.append('/Users/Yang/GitHub:PJ1/selfteaching-python-camp/exercises/1901010120/d11/mymodule/')
+from stats_word import stats_text
+from os import path
+
+#提取微信地址和正文
+content_url = "https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA"
+html_code = requests.get(content_url).text
+document = PyQuery(html_code)
+content = document("#js_content").text().replace("\n", "")
+
+
+#file_path = path.join(path.dirname(path.abspath(__file__)),'./tang300.json')
+#with open(file_path,'r', encoding="utf-8") as f_poems:
+#    poems_json = json.load(f_poems)
+
+#all_poems = ""
+#for poems_info in poems_json:
+#    all_poems += poems_info["contents"]
+
+try:
+    en_result, cn_result = stats_text("",content)
+    #print cn_result
+    smtp_host = "smtp.sina.com"
+    sender = input("Please enter the sender's email address: ")
+    password = getpass.getpass("Please enter the sender's email password: ")
+    recipient = input("Please enter the recipient's email address: ")
+    yagmail.SMTP(user=sender, password=password, host=smtp_host).send(recipient, "Cutted words", str(cn_result))
+except ValueError as e:
+    print("Exception catched.")
+    print(e)
diff --git a/exercises/1901010103/d11/mymodule/stats_word.py b/exercises/1901010103/d11/mymodule/stats_word.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+
+import string
+import jieba
+from collections import Counter
+
+def stats_text_en(en_text):
+    if not isinstance(en_text, str):
+        raise ValueError("The method only accepts type str.")
+
+    for en_special_word in string.punctuation:
+        if en_special_word in en_text:
+            en_text = en_text.replace(en_special_word, "")
+
+    en_word_counter = Counter()
+    en_text = en_text.split()
+    for en_word in en_text:
+        en_word_counter[en_word] += 1
+
+    return en_word_counter.most_common()
+
+
+def stats_text_cn(cn_text):
+    if not isinstance(cn_text, str):
+        raise ValueError("The method only accepts type str.")
+
+    cn_special_words = "！“”#$%&‘’（）*+，-。/：；、……<=>？@[]「」《》^_`{|}~\n"
+    for cn_special_word in cn_special_words:
+        if cn_special_word in cn_text:
+            cn_text = cn_text.replace(cn_special_word, "")
+
+    #把字符串给cut作为第一个参数
+    cn_text = jieba.cut(cn_text, cut_all=False)
+
+    cn_word_counter = Counter()
+    for cn_word in cn_text:
+        cn_word_counter[cn_word] += 1
+
+    return cn_word_counter.most_common(100)
+
+
+def stats_text(en_text, cn_text):
+    if (not isinstance(en_text, str)) or (not isinstance(cn_text, str)):
+        raise ValueError("The method only accepts type str.")
+
+    return stats_text_en(en_text), stats_text_cn(cn_text)