Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions exercises/1901100139/d10/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import Counter
import jieba
def stats_text_en(text,count):
elements = text.split()
words = []
Expand All @@ -10,11 +11,13 @@ def stats_text_en(text,count):
words.append(element)
return Counter(words).most_common(count)
def stats_text_cn(text,count):
cn_characters = []
for character in text:
if '\u4e00' <= character <= '\u9fff':
cn_characters.append(character)
return Counter(cn_characters).most_common(count)
words=jieba.cut(text)
tmp=[]
for i in words:
if len(i)>1:
tmp.append(i)
return Counter(tmp).most_common(count)

def stats_text(text,count):
'''
合并中英词频的结果
Expand Down
13 changes: 8 additions & 5 deletions exercises/1901100139/d11/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import Counter
import jieba
def stats_text_en(text,count):
elements = text.split()
words = []
Expand All @@ -10,11 +11,13 @@ def stats_text_en(text,count):
words.append(element)
return Counter(words).most_common(count)
def stats_text_cn(text,count):
cn_characters = []
for character in text:
if '\u4e00' <= character <= '\u9fff':
cn_characters.append(character)
return Counter(cn_characters).most_common(count)
words=jieba.cut(text)
tmp=[]
for i in words:
if len(i)>1:
tmp.append(i)
return Counter(tmp).most_common(count)

def stats_text(text,count):
'''
合并中英词频的结果
Expand Down
2 changes: 1 addition & 1 deletion exercises/1901100139/d12/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def handler(msg):
msg.reply(str(result))
except Exception as e:
logging.exception(e)
embed
embed()


if __name__=='__main__':
Expand Down
13 changes: 8 additions & 5 deletions exercises/1901100139/d12/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import Counter
import jieba
def stats_text_en(text,count):
elements = text.split()
words = []
Expand All @@ -10,11 +11,13 @@ def stats_text_en(text,count):
words.append(element)
return Counter(words).most_common(count)
def stats_text_cn(text,count):
cn_characters = []
for character in text:
if '\u4e00' <= character <= '\u9fff':
cn_characters.append(character)
return Counter(cn_characters).most_common(count)
words=jieba.cut(text)
tmp=[]
for i in words:
if len(i)>1:
tmp.append(i)
return Counter(tmp).most_common(count)

def stats_text(text,count):
'''
合并中英词频的结果
Expand Down
60 changes: 60 additions & 0 deletions exercises/1901100139/d13/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from os import path
import logging
import requests
import matplotlib.pyplot as plt
import pyquery
from wxpy import *
from mymodule import stats_word

cwd=path.abspath(path.dirname(__file__))

plt.rcParams['font.sans-serif']='simHei'

logging.basicConfig(format='file:%(filename)s|line:%(lineno)d|message:%(message)s',level=logging.DEBUG)

def get_article(url):
r=requests.get(url)
document= pyquery.PyQuery(r.text)
return document('#js_content').text()

def generate_image(data,image_path):
labels=[v[0] for v in data]
widths=[v[1] for v in data]
ypos = range(len(data))
fig,ax=plt.subplots()
ax.barh(ypos,widths)
ax.set_yticks(ypos)
ax.set_yticklabels(labels)
ax.invert_yaxis()
ax.set_ylabel('关键字')
ax.set_xlabel('词频')
ax.set_title('词频统计')
fig.savefig(image_path,bbox_inches='tight')

def main():
bot=Bot()
friends=bot.friends()

@bot.register(friends,SHARING)
def handler(msg):
try:
logging.info('sharing url=%s',msg.url)
article=get_article(msg.url)
result=stats_word.stats_text_cn(article,20)
image_path=path.join(cwd,'stats.png')
generate_image(result,image_path)
msg.reply_image(image_path)
except Exception as e:
logging.exception(e)
embed()

def test():
article=get_article('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
result=stats_word.stats_text_cn(article,20)
image_path=path.join(cwd,'stats.png')
generate_image(result,image_path)

if __name__=='__main__':
test()


25 changes: 25 additions & 0 deletions exercises/1901100139/d13/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from collections import Counter
import jieba
def stats_text_en(text,count):
elements = text.split()
words = []
symbols = ',.*-!'
for element in elements:
for symbol in symbols:
element = element.replace(symbol,'')
if len(element) and element.isascii():
words.append(element)
return Counter(words).most_common(count)
def stats_text_cn(text,count):
words=jieba.cut(text)
tmp=[]
for i in words:
if len(i)>1:
tmp.append(i)
return Counter(tmp).most_common(count)

def stats_text(text,count):
'''
合并中英词频的结果
'''
return stats_text_en(text,count) + stats_text_cn(text,count)