Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions exercises/1901100092/d11/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from mymodule import stats_word
from pyquery import PyQuery
import requests
import getpass
import yagmail

def load_file():

response = requests.get(url = 'https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')

document = PyQuery(response.text)
content = document('#js_content').text()
return content

#text=1 #验证参数检查功能是否生效
try:
i = stats_word.stats_text_cn(load_file())
print(i)
except ValueError:
print('stats_text参数应为字符串类型')

sender = input('输入发件人邮箱:')
password_1 = getpass.getpass('输入发件人邮箱密码:')
recipients = input('输入收件人邮箱:')

yag = yagmail.SMTP(user = sender,password = password_1,host = 'smtp.qq.com')
yag.send(to = recipients,subject = "【1901100092】自学训练营学习15群DAY11 Yui",contents = str(i))
90 changes: 90 additions & 0 deletions exercises/1901100092/d11/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
text = '''
The Zen of Python, by Tim Peters
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambxiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

美丽总比丑陋好
显式比隐式好
简单总比复杂好
复杂总比复杂好
平的比嵌套的好
稀疏总比密集好
可读性
特殊情况还不足以打破规则
虽然实用性胜过纯洁性
错误不应该悄无声息地过去
除非显式地沉默
面对敏锐,拒绝猜测的诱惑
应该有一种——而且最好只有一种——显而易见的方法来做到这一点
不过,除非你是荷兰人,否则这种方式一开始可能并不明显
现在总比不做好
虽然从来没有比“现在”更好
如果实现很难解释,这是一个坏主意
如果实现很容易解释,这可能是一个好主意
名称空间是一个很棒的主意——让我们做更多这样的事情
'''
import jieba
import collections


def stats_text_en(text):
if not isinstance(text,str):
raise ValueError('参数应为字符串类型')

elements=text.split() #整理字符
words=[] #建立列表
symbols=',.*-!'
for element in elements:
for symbol in symbols:
element=element.replace(symbol,'') #清理符号
if len(element) and element.isascii():
words.append(element) #将字符加入words列表

l_en = collections.Counter(words).most_common(100)
return l_en

def stats_text_cn(text):
if not isinstance(text,str):
raise ValueError('参数应为字符串类型')

seg_list = jieba.cut(text,cut_all=False)

cn_characters = [] #建立一个空列表,为下一个循环使用
for i in seg_list:
if '\u4e00' <= i <= '\u9fff' and len(i) >= 2:
cn_characters.append(i)

l_en_cn = collections.Counter(cn_characters).most_common(100)
return l_en_cn



def stats_text(text):
'''输出合并词频统计结果'''
if not isinstance(text,str):
raise ValueError('参数应为字符串类型')

return stats_text_en(text) + stats_text_cn(text)

if __name__ =='__main__':
print(stats_text(text))



17 changes: 17 additions & 0 deletions exercises/1901100092/d12/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from mymodule import stats_word
from pyquery import PyQuery
import requests
import getpass
from wxpy import *

bot = Bot(cache_path = True)
friend = bot.friends()
@bot.register(friend,SHARING)
def load_file(msg):
response = requests.get(url = msg.url)

document = PyQuery(response.text)
content = document('#js_content').text()
i = str(stats_word.stats_text_cn(content))
msg.reply(i)
embed()
90 changes: 90 additions & 0 deletions exercises/1901100092/d12/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
text = '''
The Zen of Python, by Tim Peters
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambxiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

美丽总比丑陋好
显式比隐式好
简单总比复杂好
复杂总比复杂好
平的比嵌套的好
稀疏总比密集好
可读性
特殊情况还不足以打破规则
虽然实用性胜过纯洁性
错误不应该悄无声息地过去
除非显式地沉默
面对敏锐,拒绝猜测的诱惑
应该有一种——而且最好只有一种——显而易见的方法来做到这一点
不过,除非你是荷兰人,否则这种方式一开始可能并不明显
现在总比不做好
虽然从来没有比“现在”更好
如果实现很难解释,这是一个坏主意
如果实现很容易解释,这可能是一个好主意
名称空间是一个很棒的主意——让我们做更多这样的事情
'''
import jieba
import collections


def stats_text_en(text):
if not isinstance(text,str):
raise ValueError('参数应为字符串类型')

elements=text.split() #整理字符
words=[] #建立列表
symbols=',.*-!'
for element in elements:
for symbol in symbols:
element=element.replace(symbol,'') #清理符号
if len(element) and element.isascii():
words.append(element) #将字符加入words列表

l_en = collections.Counter(words).most_common(100)
return l_en

def stats_text_cn(text):
if not isinstance(text,str):
raise ValueError('参数应为字符串类型')

seg_list = jieba.cut(text,cut_all=False)

cn_characters = [] #建立一个空列表,为下一个循环使用
for i in seg_list:
if '\u4e00' <= i <= '\u9fff' and len(i) >= 2:
cn_characters.append(i)

l_en_cn = collections.Counter(cn_characters).most_common(100)
return l_en_cn



def stats_text(text):
'''输出合并词频统计结果'''
if not isinstance(text,str):
raise ValueError('参数应为字符串类型')

return stats_text_en(text) + stats_text_cn(text)

if __name__ =='__main__':
print(stats_text(text))