From 6b660d47fef8964a96f169bc731936b0ad76292b Mon Sep 17 00:00:00 2001 From: PassionPit <53104220+PassionPit@users.noreply.github.com> Date: Fri, 30 Aug 2019 10:40:09 +0800 Subject: [PATCH 1/2] Create main.py --- exercises/1901100139/d10/main.py | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 exercises/1901100139/d10/main.py diff --git a/exercises/1901100139/d10/main.py b/exercises/1901100139/d10/main.py new file mode 100644 index 000000000..0de5c6fc5 --- /dev/null +++ b/exercises/1901100139/d10/main.py @@ -0,0 +1,33 @@ +from mymodule import stats_word +from os import path +import json +import re +import logging + + +logging.basicConfig( + format='file:%(filename)s|line:%(lineno)d|message:%(message)s',level=logging.DEBUG) + +def load_file(): + file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json') + print('当前文件路径:', __file__,'\n读取文件路径:',file_path) + with open (file_path,'r',encoding='utf-8') as f: + return f.read() + +def merge_poems(data): + poems='' + for item in data: + poems += item.get('contents','') + return poems + +def main(): + try: + data=load_file() + logging.info(data[0]) + poems=merge_poems(json.loads(data)) + logging.info('result ==> %s',stats_word.stats_text_cn(poems,100)) + except Exception as e: + logging.exception(e) + +if __name__=='__main__': + main() From 428229f14ac0d2369e99be612ff4cad764bd67ca Mon Sep 17 00:00:00 2001 From: PassionPit <53104220+PassionPit@users.noreply.github.com> Date: Fri, 30 Aug 2019 10:40:11 +0800 Subject: [PATCH 2/2] Create stats_word.py --- .../1901100139/d10/mymodule/stats_word.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 exercises/1901100139/d10/mymodule/stats_word.py diff --git a/exercises/1901100139/d10/mymodule/stats_word.py b/exercises/1901100139/d10/mymodule/stats_word.py new file mode 100644 index 000000000..a1b269578 --- /dev/null +++ b/exercises/1901100139/d10/mymodule/stats_word.py @@ -0,0 +1,22 @@ +from collections import Counter +def stats_text_en(text,count): + elements = text.split() + words = [] + symbols = ',.*-!' + for element in elements: + for symbol in symbols: + element = element.replace(symbol,'') + if len(element) and element.isascii(): + words.append(element) + return Counter(words).most_common(count) +def stats_text_cn(text,count): + cn_characters = [] + for character in text: + if '\u4e00' <= character <= '\u9fff': + cn_characters.append(character) + return Counter(cn_characters).most_common(count) +def stats_text(text,count): + ''' + 合并中英词频的结果 + ''' + return stats_text_en(text,count) + stats_text_cn(text,count) \ No newline at end of file