From f6ce2c0d0d44eed9f5d7e8c14f81adf1f24c38f4 Mon Sep 17 00:00:00 2001 From: YuiLovezuru <754555035@qq.com> Date: Tue, 10 Sep 2019 19:00:39 +0800 Subject: [PATCH 1/4] Create main.py --- exercises/1901100092/d11/main.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 exercises/1901100092/d11/main.py diff --git a/exercises/1901100092/d11/main.py b/exercises/1901100092/d11/main.py new file mode 100644 index 000000000..68078d0f8 --- /dev/null +++ b/exercises/1901100092/d11/main.py @@ -0,0 +1,27 @@ +from mymodule import stats_word +from pyquery import PyQuery +import requests +import getpass +import yagmail + +def load_file(): + + response = requests.get(url = 'https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') + + document = PyQuery(response.text) + content = document('#js_content').text() + return content + +#text=1 #验证参数检查功能是否生效 +try: + i = stats_word.stats_text_cn(load_file()) + print(i) +except ValueError: + print('stats_text参数应为字符串类型') + +sender = input('输入发件人邮箱:') +password_1 = getpass.getpass('输入发件人邮箱密码:') +recipients = input('输入收件人邮箱:') + +yag = yagmail.SMTP(user = sender,password = password_1,host = 'smtp.qq.com') +yag.send(to = recipients,subject = "【1901100092】自学训练营学习15群DAY11 Yui",contents = str(i)) From b9ce97edf95f1f93c217136ae507a8fa1eec5a7a Mon Sep 17 00:00:00 2001 From: YuiLovezuru <754555035@qq.com> Date: Tue, 10 Sep 2019 19:00:53 +0800 Subject: [PATCH 2/4] Create stats_word.py --- .../1901100092/d11/mymodule/stats_word.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 exercises/1901100092/d11/mymodule/stats_word.py diff --git a/exercises/1901100092/d11/mymodule/stats_word.py b/exercises/1901100092/d11/mymodule/stats_word.py new file mode 100644 index 000000000..79ce8bc9f --- /dev/null +++ b/exercises/1901100092/d11/mymodule/stats_word.py @@ -0,0 +1,90 @@ +text = ''' +The Zen of Python, by Tim Peters +Beautiful is better than ugly. +Explicit is better than implicit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases aren't special enough to break the rules. +Although practicality beats purity. +Errors should never pass silently. +Unless explicitly silenced. +In the face of ambxiguity, refuse the temptation to guess. +There should be one-- and preferably only one --obvious way to do it. +Although that way may not be obvious at first unless you're Dutch. +Now is better than never. +Although never is often better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it may be a good idea. +Namespaces are one honking great idea -- let's do more of those! + +美丽总比丑陋好 +显式比隐式好 +简单总比复杂好 +复杂总比复杂好 +平的比嵌套的好 +稀疏总比密集好 +可读性 +特殊情况还不足以打破规则 +虽然实用性胜过纯洁性 +错误不应该悄无声息地过去 +除非显式地沉默 +面对敏锐,拒绝猜测的诱惑 +应该有一种——而且最好只有一种——显而易见的方法来做到这一点 +不过,除非你是荷兰人,否则这种方式一开始可能并不明显 +现在总比不做好 +虽然从来没有比“现在”更好 +如果实现很难解释,这是一个坏主意 +如果实现很容易解释,这可能是一个好主意 +名称空间是一个很棒的主意——让我们做更多这样的事情 +''' +import jieba +import collections + + +def stats_text_en(text): + if not isinstance(text,str): + raise ValueError('参数应为字符串类型') + + elements=text.split() #整理字符 + words=[] #建立列表 + symbols=',.*-!' + for element in elements: + for symbol in symbols: + element=element.replace(symbol,'') #清理符号 + if len(element) and element.isascii(): + words.append(element) #将字符加入words列表 + + l_en = collections.Counter(words).most_common(100) + return l_en + +def stats_text_cn(text): + if not isinstance(text,str): + raise ValueError('参数应为字符串类型') + + seg_list = jieba.cut(text,cut_all=False) + + cn_characters = [] #建立一个空列表,为下一个循环使用 + for i in seg_list: + if '\u4e00' <= i <= '\u9fff' and len(i) >= 2: + cn_characters.append(i) + + l_en_cn = collections.Counter(cn_characters).most_common(100) + return l_en_cn + + + +def stats_text(text): + '''输出合并词频统计结果''' + if not isinstance(text,str): + raise ValueError('参数应为字符串类型') + + return stats_text_en(text) + stats_text_cn(text) + +if __name__ =='__main__': + print(stats_text(text)) + + + From d4284a8c3986caa5ca2e94a35d17de567a2dfb51 Mon Sep 17 00:00:00 2001 From: YuiLovezuru <754555035@qq.com> Date: Wed, 18 Sep 2019 19:03:55 +0800 Subject: [PATCH 3/4] Create main.py --- exercises/1901100092/d12/main.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 exercises/1901100092/d12/main.py diff --git a/exercises/1901100092/d12/main.py b/exercises/1901100092/d12/main.py new file mode 100644 index 000000000..503b1f766 --- /dev/null +++ b/exercises/1901100092/d12/main.py @@ -0,0 +1,17 @@ +from mymodule import stats_word +from pyquery import PyQuery +import requests +import getpass +from wxpy import * + +bot = Bot(cache_path = True) +friend = bot.friends() +@bot.register(friend,SHARING) +def load_file(msg): + response = requests.get(url = msg.url) + + document = PyQuery(response.text) + content = document('#js_content').text() + i = str(stats_word.stats_text_cn(content)) + msg.reply(i) +embed() \ No newline at end of file From 60ec64750e21a13b6bc1990bce5a637d4afc87c0 Mon Sep 17 00:00:00 2001 From: YuiLovezuru <754555035@qq.com> Date: Wed, 18 Sep 2019 19:04:08 +0800 Subject: [PATCH 4/4] Create stats_word.py --- .../1901100092/d12/mymodule/stats_word.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 exercises/1901100092/d12/mymodule/stats_word.py diff --git a/exercises/1901100092/d12/mymodule/stats_word.py b/exercises/1901100092/d12/mymodule/stats_word.py new file mode 100644 index 000000000..79ce8bc9f --- /dev/null +++ b/exercises/1901100092/d12/mymodule/stats_word.py @@ -0,0 +1,90 @@ +text = ''' +The Zen of Python, by Tim Peters +Beautiful is better than ugly. +Explicit is better than implicit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases aren't special enough to break the rules. +Although practicality beats purity. +Errors should never pass silently. +Unless explicitly silenced. +In the face of ambxiguity, refuse the temptation to guess. +There should be one-- and preferably only one --obvious way to do it. +Although that way may not be obvious at first unless you're Dutch. +Now is better than never. +Although never is often better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it may be a good idea. +Namespaces are one honking great idea -- let's do more of those! + +美丽总比丑陋好 +显式比隐式好 +简单总比复杂好 +复杂总比复杂好 +平的比嵌套的好 +稀疏总比密集好 +可读性 +特殊情况还不足以打破规则 +虽然实用性胜过纯洁性 +错误不应该悄无声息地过去 +除非显式地沉默 +面对敏锐,拒绝猜测的诱惑 +应该有一种——而且最好只有一种——显而易见的方法来做到这一点 +不过,除非你是荷兰人,否则这种方式一开始可能并不明显 +现在总比不做好 +虽然从来没有比“现在”更好 +如果实现很难解释,这是一个坏主意 +如果实现很容易解释,这可能是一个好主意 +名称空间是一个很棒的主意——让我们做更多这样的事情 +''' +import jieba +import collections + + +def stats_text_en(text): + if not isinstance(text,str): + raise ValueError('参数应为字符串类型') + + elements=text.split() #整理字符 + words=[] #建立列表 + symbols=',.*-!' + for element in elements: + for symbol in symbols: + element=element.replace(symbol,'') #清理符号 + if len(element) and element.isascii(): + words.append(element) #将字符加入words列表 + + l_en = collections.Counter(words).most_common(100) + return l_en + +def stats_text_cn(text): + if not isinstance(text,str): + raise ValueError('参数应为字符串类型') + + seg_list = jieba.cut(text,cut_all=False) + + cn_characters = [] #建立一个空列表,为下一个循环使用 + for i in seg_list: + if '\u4e00' <= i <= '\u9fff' and len(i) >= 2: + cn_characters.append(i) + + l_en_cn = collections.Counter(cn_characters).most_common(100) + return l_en_cn + + + +def stats_text(text): + '''输出合并词频统计结果''' + if not isinstance(text,str): + raise ValueError('参数应为字符串类型') + + return stats_text_en(text) + stats_text_cn(text) + +if __name__ =='__main__': + print(stats_text(text)) + + +