diff --git a/exercises/1901100050/d08/main.py b/exercises/1901100050/d08/main.py index e26bda28b..6be61f0bd 100644 --- a/exercises/1901100050/d08/main.py +++ b/exercises/1901100050/d08/main.py @@ -15,7 +15,9 @@ def test_traceback(): def test_logger(): try: stats_word.stats_text(1) - except exception as e: + + except Exception as e: + # print('test_logger =>',e) logger.exception(e) diff --git a/exercises/1901100050/d09/main.py b/exercises/1901100050/d09/main.py new file mode 100644 index 000000000..72eaad146 --- /dev/null +++ b/exercises/1901100050/d09/main.py @@ -0,0 +1,35 @@ +from mymodule import stats_word +from os import path +import json +import re +import logging + +logging.basicConfig( + format='file:%(filename)s|line:%(lineno)d|message:%(message)s', level=logging.DEBUG) + +def load_file(): + file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json') + print('当前文件路径:', __file__, '\n读取文件路径:', file_path) + + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + +def merge_poems(date): + poems = '' + for item in date: + poems += item.get('contents', '') + return poems + +def main(): + try: + data = load_file() + logging.info(data[0]) + poems = merge_poems(json.loads(data)) + logging.info('result ==> %s', stats_word.stats_text_cn(poems,100)) + except Exception as e: + logging.exception(e) + +if __name__ == '__main__': + main() + + diff --git a/exercises/1901100050/d09/mymodule/stats_word.py b/exercises/1901100050/d09/mymodule/stats_word.py new file mode 100644 index 000000000..b9b2a02ed --- /dev/null +++ b/exercises/1901100050/d09/mymodule/stats_word.py @@ -0,0 +1,34 @@ +from collections import Counter + +# 统计参数中每个英⽂单词出现的次数 +def stats_text_en(text, count): + elements = text.split() + words = [] + symbols = ',.*-!' + for element in elements: + for symbol in symbols: + element = element.replace(symbol, '') + # 用 str 类型 的 isascii 方法判断是否是英文单词 + if len(element) and element.isascii(): + words.append(element) + return Counter(words).most_common(count) + + +# 统计参数中每个中文汉字出现的次数 +def stats_text_cn(text, count): + cn_characters = [] + for character in text: + # unicode 中 中文 字符的范围 + if '\u4e00' <= character <= '\u9fff': + cn_characters.append(character) + return Counter(cn_characters).most_common(count) + +def stats_text(text): + ''' + 合并 英文词频 和 中文词频 的结果 + ''' + if not isinstance(text, str): + raise ValueError('参数必须是 str 类型,输入类型 %s' % type(text)) + return stats_text_en(text, count) + stats_text_cn(text, count) + + diff --git a/exercises/1901100050/d10/main.py b/exercises/1901100050/d10/main.py new file mode 100644 index 000000000..b09e1addf --- /dev/null +++ b/exercises/1901100050/d10/main.py @@ -0,0 +1,33 @@ +from mymodule import stats_word +from os import path +import json +import re +import logging + +logging.basicConfig( + format='file:%(filename)s|line:%(lineno)d|message:%(message)s', level=logging.DEBUG) + +def load_file(): + file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json') + print('当前文件路径:', __file__, '\n读取文件路径:', file_path) + + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + +def merge_poems(date): + poems = '' + for item in date: + poems += item.get('contents', '') + return poems + +def main(): + try: + data = load_file() + logging.info(data[0]) + poems = merge_poems(json.loads(data)) + logging.info('result ==> %s', stats_word.stats_text_cn(poems,100)) + except Exception as e: + logging.exception(e) + +if __name__ == '__main__': + main() diff --git a/exercises/1901100050/d10/mymodule/stats_word.py b/exercises/1901100050/d10/mymodule/stats_word.py new file mode 100644 index 000000000..acb568440 --- /dev/null +++ b/exercises/1901100050/d10/mymodule/stats_word.py @@ -0,0 +1,35 @@ +from collections import Counter +import jieba + +# 统计参数中每个英⽂单词出现的次数 +def stats_text_en(text, count): + elements = text.split() + words = [] + symbols = ',.*-!' + for element in elements: + for symbol in symbols: + element = element.replace(symbol, '') + # 用 str 类型 的 isascii 方法判断是否是英文单词 + if len(element) and element.isascii(): + words.append(element) + return Counter(words).most_common(count) + + +# 统计参数中每个中文汉字出现的次数 +def stats_text_cn(text, count): + words = jieba.cut(text) + tmp = [] + for i in words: + if len(i) > 1: + tmp.append(i) + return Counter(tmp).most_common(count) + +def stats_text(text): + ''' + 合并 英文词频 和 中文词频 的结果 + ''' + if not isinstance(text, str): + raise ValueError('参数必须是 str 类型,输入类型 %s' % type(text)) + return stats_text_en(text, count) + stats_text_cn(text, count) + +