From 6c4d87b5c44391d9168eeda54dd4787bea597639 Mon Sep 17 00:00:00 2001 From: PassionPit <53104220+PassionPit@users.noreply.github.com> Date: Fri, 30 Aug 2019 17:21:23 +0800 Subject: [PATCH 1/6] Create stats_word.py --- .../1901100139/d13/mymodule/stats_word.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 exercises/1901100139/d13/mymodule/stats_word.py diff --git a/exercises/1901100139/d13/mymodule/stats_word.py b/exercises/1901100139/d13/mymodule/stats_word.py new file mode 100644 index 000000000..0742f13be --- /dev/null +++ b/exercises/1901100139/d13/mymodule/stats_word.py @@ -0,0 +1,25 @@ +from collections import Counter +import jieba +def stats_text_en(text,count): + elements = text.split() + words = [] + symbols = ',.*-!' + for element in elements: + for symbol in symbols: + element = element.replace(symbol,'') + if len(element) and element.isascii(): + words.append(element) + return Counter(words).most_common(count) +def stats_text_cn(text,count): + words=jieba.cut(text) + tmp=[] + for i in words: + if len(i)>1: + tmp.append(i) + return Counter(tmp).most_common(count) + +def stats_text(text,count): + ''' + 合并中英词频的结果 + ''' + return stats_text_en(text,count) + stats_text_cn(text,count) \ No newline at end of file From 74a2a705eeb1bdb8b4a65c643c6514d0e3c691a0 Mon Sep 17 00:00:00 2001 From: PassionPit <53104220+PassionPit@users.noreply.github.com> Date: Fri, 30 Aug 2019 17:21:26 +0800 Subject: [PATCH 2/6] Create main.py --- exercises/1901100139/d13/main.py | 60 ++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 exercises/1901100139/d13/main.py diff --git a/exercises/1901100139/d13/main.py b/exercises/1901100139/d13/main.py new file mode 100644 index 000000000..68efc3ca8 --- /dev/null +++ b/exercises/1901100139/d13/main.py @@ -0,0 +1,60 @@ +from os import path +import logging +import requests +import matplotlib.pyplot as plt +import pyquery +from wxpy import * +from mymodule import stats_word + +cwd=path.abspath(path.dirname(__file__)) + +plt.rcParams['font.sans-serif']='simHei' + +logging.basicConfig(format='file:%(filename)s|line:%(lineno)d|message:%(message)s',level=logging.DEBUG) + +def get_article(url): + r=requests.get(url) + document= pyquery.PyQuery(r.text) + return document('#js_content').text() + +def generate_image(data,image_path): + labels=[v[0] for v in data] + widths=[v[1] for v in data] + ypos = range(len(data)) + fig,ax=plt.subplots() + ax.barh(ypos,widths) + ax.set_yticks(ypos) + ax.set_yticklabels(labels) + ax.invert_yaxis() + ax.set_ylabel('关键字') + ax.set_xlabel('词频') + ax.set_title('词频统计') + fig.savefig(image_path,bbox_inches='tight') + +def main(): + bot=Bot() + friends=bot.friends() + + @bot.register(friends,SHARING) + def handler(msg): + try: + logging.info('sharing url=%s',msg.url) + article=get_article(msg.url) + result=stats_word.stats_text_cn(article,20) + image_path=path.join(cwd,'stats.png') + generate_image(result,image_path) + msg.reply_image(image_path) + except Exception as e: + logging.exception(e) + embed() + +def test(): + article=get_article('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') + result=stats_word.stats_text_cn(article,20) + image_path=path.join(cwd,'stats.png') + generate_image(result,image_path) + +if __name__=='__main__': + test() + + From 16fdb51e6458e70d7471d9681240e6ff641a4496 Mon Sep 17 00:00:00 2001 From: PassionPit <53104220+PassionPit@users.noreply.github.com> Date: Fri, 30 Aug 2019 17:21:29 +0800 Subject: [PATCH 3/6] Update stats_word.py --- exercises/1901100139/d12/mymodule/stats_word.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/exercises/1901100139/d12/mymodule/stats_word.py b/exercises/1901100139/d12/mymodule/stats_word.py index a1b269578..0742f13be 100644 --- a/exercises/1901100139/d12/mymodule/stats_word.py +++ b/exercises/1901100139/d12/mymodule/stats_word.py @@ -1,4 +1,5 @@ from collections import Counter +import jieba def stats_text_en(text,count): elements = text.split() words = [] @@ -10,11 +11,13 @@ def stats_text_en(text,count): words.append(element) return Counter(words).most_common(count) def stats_text_cn(text,count): - cn_characters = [] - for character in text: - if '\u4e00' <= character <= '\u9fff': - cn_characters.append(character) - return Counter(cn_characters).most_common(count) + words=jieba.cut(text) + tmp=[] + for i in words: + if len(i)>1: + tmp.append(i) + return Counter(tmp).most_common(count) + def stats_text(text,count): ''' 合并中英词频的结果 From 97e68a5452a5a379f820bd390f233032cb87cc59 Mon Sep 17 00:00:00 2001 From: PassionPit <53104220+PassionPit@users.noreply.github.com> Date: Fri, 30 Aug 2019 17:21:31 +0800 Subject: [PATCH 4/6] Update main.py --- exercises/1901100139/d12/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exercises/1901100139/d12/main.py b/exercises/1901100139/d12/main.py index 9e58a1533..60d86f52d 100644 --- a/exercises/1901100139/d12/main.py +++ b/exercises/1901100139/d12/main.py @@ -25,7 +25,7 @@ def handler(msg): msg.reply(str(result)) except Exception as e: logging.exception(e) - embed + embed() if __name__=='__main__': From a2d117b01cb831abd8641fc6867d970a54c8ab1e Mon Sep 17 00:00:00 2001 From: PassionPit <53104220+PassionPit@users.noreply.github.com> Date: Fri, 30 Aug 2019 17:21:34 +0800 Subject: [PATCH 5/6] Update stats_word.py --- exercises/1901100139/d11/mymodule/stats_word.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/exercises/1901100139/d11/mymodule/stats_word.py b/exercises/1901100139/d11/mymodule/stats_word.py index a1b269578..0742f13be 100644 --- a/exercises/1901100139/d11/mymodule/stats_word.py +++ b/exercises/1901100139/d11/mymodule/stats_word.py @@ -1,4 +1,5 @@ from collections import Counter +import jieba def stats_text_en(text,count): elements = text.split() words = [] @@ -10,11 +11,13 @@ def stats_text_en(text,count): words.append(element) return Counter(words).most_common(count) def stats_text_cn(text,count): - cn_characters = [] - for character in text: - if '\u4e00' <= character <= '\u9fff': - cn_characters.append(character) - return Counter(cn_characters).most_common(count) + words=jieba.cut(text) + tmp=[] + for i in words: + if len(i)>1: + tmp.append(i) + return Counter(tmp).most_common(count) + def stats_text(text,count): ''' 合并中英词频的结果 From 6d43aded88fad62253c68a3b95682b159e1ca29b Mon Sep 17 00:00:00 2001 From: PassionPit <53104220+PassionPit@users.noreply.github.com> Date: Fri, 30 Aug 2019 17:21:37 +0800 Subject: [PATCH 6/6] Update stats_word.py --- exercises/1901100139/d10/mymodule/stats_word.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/exercises/1901100139/d10/mymodule/stats_word.py b/exercises/1901100139/d10/mymodule/stats_word.py index a1b269578..0742f13be 100644 --- a/exercises/1901100139/d10/mymodule/stats_word.py +++ b/exercises/1901100139/d10/mymodule/stats_word.py @@ -1,4 +1,5 @@ from collections import Counter +import jieba def stats_text_en(text,count): elements = text.split() words = [] @@ -10,11 +11,13 @@ def stats_text_en(text,count): words.append(element) return Counter(words).most_common(count) def stats_text_cn(text,count): - cn_characters = [] - for character in text: - if '\u4e00' <= character <= '\u9fff': - cn_characters.append(character) - return Counter(cn_characters).most_common(count) + words=jieba.cut(text) + tmp=[] + for i in words: + if len(i)>1: + tmp.append(i) + return Counter(tmp).most_common(count) + def stats_text(text,count): ''' 合并中英词频的结果