Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions exercises/1901010132/day10/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from mymodule import stats_word
import json

with open(r'\Users\Administrator\Documents\GitHub\selfteaching-python-camp\exercises\1901010132\day10\tang300.json',encoding='UTF-8') as f:
read_date = f.read()
try:
print('统计前20的词频数: \n',stats_word.stats_text_cn(read_date,20))
except ValueError as e:
print(e)
50 changes: 50 additions & 0 deletions exercises/1901010132/day10/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
text= '''
"Her eyes beginning to water, she went on,
"So I would like you all to make me a promise:
from now on, on your way to school,
or on your way home, find something beautiful
to notice. It doesn' t have to be something you
see -it could be a scent perhaps of freshly
baked bread wafting out of someone 's house,
or it could be the sound of the breeze
slightly rustling the leaves in the trees,
or the way the morning light catches one
autumn leaf as it falls gently to the ground.
Please, look for these things, and remember them."
  她的眼睛开始湿润了,她接着说因此我想让你们每个人答应我:
从今以后,在你上学或者放学的路上,要发现一些美丽的事物。
它不一定是你看到的某个东西——它可能是一种香味——
也许是新鲜烤面包的味道从某一座房里飘出来,也许是微风轻拂树叶的声音,
或者是晨光照射在轻轻飘落的秋叶上的方式。请你们寻找这些东西并且记住它们吧。
'''
import re
import collections
import jieba #结巴中文分词
count=int()
def stats_text_en(text,count):
if type(text) == str:
b=re.sub(r'[^A-Za-z]',' ',text)
list1=b.split()
return(collections.Counter(list1).most_common(count))
else:
raise ValueError('文本为非字符串')

def stats_text_cn(text,count):
if not isinstance(text,str):
raise ValueError('文本为非字符串')
p=re.compile(u'[\u4e00-\u9fa5]')
a=re.findall(p,text)
str2=''.join(a)
seg_list = jieba.cut(str2,cut_all=False) #jieba.cut返回的结构是一个可迭代的生成器generator
newlist=[]
for i in seg_list:
if len(i) >= 2:
newlist.append(i)
return(collections.Counter(newlist).most_common(count))

def stats_text(text,count):
if not isinstance(text,str):
raise ValueError('文本为非字符串')
stats_text_cn(text,count)
stats_text_en(text,count)
stats_text(text,count)
27 changes: 27 additions & 0 deletions exercises/1901010132/day11/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import requests
import pyquery
from pyquery import PyQuery
from mymodule import stats_word
import getpass
import yagmail
# 抓取网页
r = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')

# 提取正文
document = PyQuery(r.text)
content = document('#js_content').text()
print(content)

#词频统计并转为str类型
#wordString = ''.join(str(i) for i in wordList)
word_list = stats_word.stats_text_cn(content,100)
word_str = str(word_list) #转换成str类型
print(word_str)

#发送统计结果到指定邮箱
sender = input('请输入发件人邮箱:')
psw = getpass.getpass() #授权码
recipients = input('输入收件人邮箱:') #指定邮箱pythoncamp@163.com
smtp = "smtp.qq.com" #服务器地址

yagmail.SMTP(sender,psw,smtp).send(recipients,'【1901010132】自学训练营学习2群DAY11 bamboo',word_str)
50 changes: 50 additions & 0 deletions exercises/1901010132/day11/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
text= '''
"Her eyes beginning to water, she went on,
"So I would like you all to make me a promise:
from now on, on your way to school,
or on your way home, find something beautiful
to notice. It doesn' t have to be something you
see -it could be a scent perhaps of freshly
baked bread wafting out of someone 's house,
or it could be the sound of the breeze
slightly rustling the leaves in the trees,
or the way the morning light catches one
autumn leaf as it falls gently to the ground.
Please, look for these things, and remember them."
  她的眼睛开始湿润了,她接着说因此我想让你们每个人答应我:
从今以后,在你上学或者放学的路上,要发现一些美丽的事物。
它不一定是你看到的某个东西——它可能是一种香味——
也许是新鲜烤面包的味道从某一座房里飘出来,也许是微风轻拂树叶的声音,
或者是晨光照射在轻轻飘落的秋叶上的方式。请你们寻找这些东西并且记住它们吧。
'''
import re
import collections
import jieba #结巴中文分词
count=int()
def stats_text_en(text,count):
if type(text) == str:
b=re.sub(r'[^A-Za-z]',' ',text)
list1=b.split()
return(collections.Counter(list1).most_common(count))
else:
raise ValueError('文本为非字符串')

def stats_text_cn(text,count):
if not isinstance(text,str):
raise ValueError('文本为非字符串')
p=re.compile(u'[\u4e00-\u9fa5]')
a=re.findall(p,text)
str2=''.join(a)
seg_list = jieba.cut(str2,cut_all=False) #jieba.cut返回的结构是一个可迭代的生成器generator
newlist=[]
for i in seg_list:
if len(i) >= 2:
newlist.append(i)
return(collections.Counter(newlist).most_common(count))

def stats_text(text,count):
if not isinstance(text,str):
raise ValueError('文本为非字符串')
stats_text_cn(text,count)
stats_text_en(text,count)
stats_text(text,count)
8 changes: 8 additions & 0 deletions exercises/1901010132/day9/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from mymodule import stats_word
import json

with open(r'\Users\Administrator\Documents\GitHub\selfteaching-python-camp\exercises\1901010132\day9\tang300.json',encoding='UTF-8') as f:
text=f.read()


stats_word.stats_text_cn(text, 100)
42 changes: 42 additions & 0 deletions exercises/1901010132/day9/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
text= '''
"Her eyes beginning to water, she went on,
"So I would like you all to make me a promise:
from now on, on your way to school,
or on your way home, find something beautiful
to notice. It doesn' t have to be something you
see -it could be a scent perhaps of freshly
baked bread wafting out of someone 's house,
or it could be the sound of the breeze
slightly rustling the leaves in the trees,
or the way the morning light catches one
autumn leaf as it falls gently to the ground.
Please, look for these things, and remember them."
  她的眼睛开始湿润了,她接着说因此我想让你们每个人答应我:
从今以后,在你上学或者放学的路上,要发现一些美丽的事物。
它不一定是你看到的某个东西——它可能是一种香味——
也许是新鲜烤面包的味道从某一座房里飘出来,也许是微风轻拂树叶的声音,
或者是晨光照射在轻轻飘落的秋叶上的方式。请你们寻找这些东西并且记住它们吧。
'''
import re
import collections
count=int()
def stats_text_en(text,count):
if type(text)!=str:
raise ValueError("文本为非字符串")
text=re.sub('[^a-zA-Z]','',text.strip()) #表示所有非英文字母
text=text.split()
print(collections.Counter(text).most_common(count))

def stats_text_cn(text,count): #定义检索中文函数
if type(text)!=str:
raise ValueError("文本为非字符串")
text=re.sub('[^\u4e00-\u9fa5]','',text) #[^\u4e00-\u9fa5]表示所有非中文
text=' '.join(text)
text=text.split()
print(collections.Counter(text).most_common(count))

def stats_word(text,count): #定义函数,实现统计汉字和英文单词出现次数
if type(text)!=str:
raise ValueError("文本为非字符串")
stats_text_en(text,count)
stats_text_cn(text,count)