From be25e07ff8c4df836b7adff305e2bd93453971ad Mon Sep 17 00:00:00 2001 From: internetrat7788 Date: Tue, 20 Aug 2019 23:21:45 +0800 Subject: [PATCH 1/2] =?UTF-8?q?[1901010103]=E8=87=AA=E5=AD=A6=E8=AE=AD?= =?UTF-8?q?=E7=BB=83=E8=90=A5=E5=AD=A6=E4=B9=A01=E7=BE=A4=20Day10?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- exercises/1901010103/d10/mymodule/main.py | 24 ++++++++++ .../1901010103/d10/mymodule/stats_word.py | 46 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 exercises/1901010103/d10/mymodule/main.py create mode 100644 exercises/1901010103/d10/mymodule/stats_word.py diff --git a/exercises/1901010103/d10/mymodule/main.py b/exercises/1901010103/d10/mymodule/main.py new file mode 100644 index 000000000..a0bf5d3aa --- /dev/null +++ b/exercises/1901010103/d10/mymodule/main.py @@ -0,0 +1,24 @@ +#!/usr/bin/python +import sys +import json +sys.path.append('/Users/Yang/GitHub:PJ1/selfteaching-python-camp/exercises/1901010120/d08/mymodule/') + +from stats_word import stats_text +from os import path +import json + +file_path = path.join(path.dirname(path.abspath(__file__)),'./tang300.json') +with open(file_path,'r', encoding="utf-8") as f_poems: + poems_json = json.load(f_poems) + +all_poems = "" +for poems_info in poems_json: + all_poems += poems_info["contents"] + +try: + en_result, cn_result = stats_text("", all_poems) + print("EN words result: ", en_result) + print("CN words result: ", [(word,count) for word, count in cn_result if len(word) >= 2][:20]) +except ValueError as e: + print("Exception catched.") + print(e) \ No newline at end of file diff --git a/exercises/1901010103/d10/mymodule/stats_word.py b/exercises/1901010103/d10/mymodule/stats_word.py new file mode 100644 index 000000000..3dbab5192 --- /dev/null +++ b/exercises/1901010103/d10/mymodule/stats_word.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + +import string +import jieba +from collections import Counter + +def stats_text_en(en_text): + if not isinstance(en_text, str): + raise ValueError("The method only accepts type str.") + + for en_special_word in string.punctuation: + if en_special_word in en_text: + en_text = en_text.replace(en_special_word, "") + + en_word_counter = Counter() + en_text = en_text.split() + for en_word in en_text: + en_word_counter[en_word] += 1 + + return en_word_counter.most_common() + + +def stats_text_cn(cn_text): + if not isinstance(cn_text, str): + raise ValueError("The method only accepts type str.") + + #cn_special_words = "!“”#$%&‘’()*+,-。/:;、……<=>?@[]「」《》^_`{|}~\n" + #for cn_special_word in cn_special_words: + # if cn_special_word in cn_text: + # cn_text = cn_text.replace(cn_special_word, "") + + #把字符串给cut作为第一个参数 + cn_text = jieba.cut(cn_text, cut_all=False) + + cn_word_counter = Counter() + for cn_word in cn_text: + cn_word_counter[cn_word] += 1 + + return cn_word_counter.most_common() + + +def stats_text(en_text, cn_text): + if (not isinstance(en_text, str)) or (not isinstance(cn_text, str)): + raise ValueError("The method only accepts type str.") + + return stats_text_en(en_text), stats_text_cn(cn_text) From ef1df9c6e3363f4e7bf3bfc6da4ae7273afc6bbb Mon Sep 17 00:00:00 2001 From: internetrat7788 Date: Thu, 22 Aug 2019 23:53:57 +0800 Subject: [PATCH 2/2] =?UTF-8?q?[1901010103]=E8=87=AA=E5=AD=A6=E8=AE=AD?= =?UTF-8?q?=E7=BB=83=E8=90=A5=E5=AD=A6=E4=B9=A01=E7=BE=A4=20Day11?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- exercises/1901010103/d11/mymodule/main.py | 36 +++++++++++++++ .../1901010103/d11/mymodule/stats_word.py | 46 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 exercises/1901010103/d11/mymodule/main.py create mode 100644 exercises/1901010103/d11/mymodule/stats_word.py diff --git a/exercises/1901010103/d11/mymodule/main.py b/exercises/1901010103/d11/mymodule/main.py new file mode 100644 index 000000000..65f56aec7 --- /dev/null +++ b/exercises/1901010103/d11/mymodule/main.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +import yagmail +import requests +import getpass +import sys +from pyquery import PyQuery +sys.path.append('/Users/Yang/GitHub:PJ1/selfteaching-python-camp/exercises/1901010120/d11/mymodule/') +from stats_word import stats_text +from os import path + +#提取微信地址和正文 +content_url = "https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA" +html_code = requests.get(content_url).text +document = PyQuery(html_code) +content = document("#js_content").text().replace("\n", "") + + +#file_path = path.join(path.dirname(path.abspath(__file__)),'./tang300.json') +#with open(file_path,'r', encoding="utf-8") as f_poems: +# poems_json = json.load(f_poems) + +#all_poems = "" +#for poems_info in poems_json: +# all_poems += poems_info["contents"] + +try: + en_result, cn_result = stats_text("",content) + #print cn_result + smtp_host = "smtp.sina.com" + sender = input("Please enter the sender's email address: ") + password = getpass.getpass("Please enter the sender's email password: ") + recipient = input("Please enter the recipient's email address: ") + yagmail.SMTP(user=sender, password=password, host=smtp_host).send(recipient, "Cutted words", str(cn_result)) +except ValueError as e: + print("Exception catched.") + print(e) \ No newline at end of file diff --git a/exercises/1901010103/d11/mymodule/stats_word.py b/exercises/1901010103/d11/mymodule/stats_word.py new file mode 100644 index 000000000..76e1fb165 --- /dev/null +++ b/exercises/1901010103/d11/mymodule/stats_word.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + +import string +import jieba +from collections import Counter + +def stats_text_en(en_text): + if not isinstance(en_text, str): + raise ValueError("The method only accepts type str.") + + for en_special_word in string.punctuation: + if en_special_word in en_text: + en_text = en_text.replace(en_special_word, "") + + en_word_counter = Counter() + en_text = en_text.split() + for en_word in en_text: + en_word_counter[en_word] += 1 + + return en_word_counter.most_common() + + +def stats_text_cn(cn_text): + if not isinstance(cn_text, str): + raise ValueError("The method only accepts type str.") + + cn_special_words = "!“”#$%&‘’()*+,-。/:;、……<=>?@[]「」《》^_`{|}~\n" + for cn_special_word in cn_special_words: + if cn_special_word in cn_text: + cn_text = cn_text.replace(cn_special_word, "") + + #把字符串给cut作为第一个参数 + cn_text = jieba.cut(cn_text, cut_all=False) + + cn_word_counter = Counter() + for cn_word in cn_text: + cn_word_counter[cn_word] += 1 + + return cn_word_counter.most_common(100) + + +def stats_text(en_text, cn_text): + if (not isinstance(en_text, str)) or (not isinstance(cn_text, str)): + raise ValueError("The method only accepts type str.") + + return stats_text_en(en_text), stats_text_cn(cn_text)