From c9a62f13257f9aea43e40fc9b27b83e998697f6d Mon Sep 17 00:00:00 2001 From: wanghui333 Date: Fri, 30 Aug 2019 15:44:42 +0800 Subject: [PATCH 1/2] Create main.py --- exercises/1901090061/d12/main.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 exercises/1901090061/d12/main.py diff --git a/exercises/1901090061/d12/main.py b/exercises/1901090061/d12/main.py new file mode 100644 index 000000000..9532b14f0 --- /dev/null +++ b/exercises/1901090061/d12/main.py @@ -0,0 +1,23 @@ +import logging +import requests +import pyquery +from wxpy import * +from mymodule import stats_word +logging.basicConfig(format='file:%(filename)s|line:%(lineno)d|%(message)s',level=logging.DEBUG) +def get_artical(url): + r = requests.get(url) + document = pyquery.PyQuery(r.text) + return document('#js_content').text() +def main(): + bot = Bot() + my_friend = bot.friends() + @bot.register(my_friend,SHARING) + def handler(msg): + artical = get_artical(msg.url) + result = stats_word.stats_text_cn(artical,100) + print(result) + return result + + embed() +if __name__ =='__main__': + main() \ No newline at end of file From dc734d001255407a33cde94122130974d9777fd8 Mon Sep 17 00:00:00 2001 From: wanghui333 Date: Fri, 30 Aug 2019 15:44:45 +0800 Subject: [PATCH 2/2] Create stats_word.py --- .../1901090061/d12/mymodule/stats_word.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 exercises/1901090061/d12/mymodule/stats_word.py diff --git a/exercises/1901090061/d12/mymodule/stats_word.py b/exercises/1901090061/d12/mymodule/stats_word.py new file mode 100644 index 000000000..e8e097749 --- /dev/null +++ b/exercises/1901090061/d12/mymodule/stats_word.py @@ -0,0 +1,35 @@ +from collections import Counter +import jieba + +# 创建一个函数,统计参数中每个英文单词词频 +def stats_text_en(text,count): + elements = text.split() + words = [] + symbols = ',.*-!' + for element in elements: + for symbol in symbols: + element = element.replace(symbol,'') + # 用 str 类型的 isascii 方法判断是否是英文单词 + if len(element) and element.isascii(): + words.append(element) + return Counter(words).most_common(count) + + +#定义一个函数,统计参数中每个中文汉字出现的次数 +def stats_text_cn(text,count): + characters_cn = [] + characters_1 = jieba.cut(text,cut_all=False) + for character in characters_1: + if len(character) >= 2: + characters_cn.append(character) + return Counter(characters_cn).most_common(count) + + +# 创建一个函数,分别调用stats_text_en和stats_text_cn +def stats_text(text,count): + """ + 分别调用stats_word_en和stats_text_cn函数,输出合并词频统计结果 + """ + if not isinstance(text,str): + raise ValueError('输入参数必须为str类型,当前输入类型为%s'%type(text)) + return stats_text_en(text,count) + stats_text_cn(text,count)