Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion exercises/1901100050/d08/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ def test_traceback():
def test_logger():
try:
stats_word.stats_text(1)
except exception as e:

except Exception as e:

# print('test_logger =>',e)
logger.exception(e)

Expand Down
35 changes: 35 additions & 0 deletions exercises/1901100050/d09/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from mymodule import stats_word
from os import path
import json
import re
import logging

logging.basicConfig(
format='file:%(filename)s|line:%(lineno)d|message:%(message)s', level=logging.DEBUG)

def load_file():
file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json')
print('当前文件路径:', __file__, '\n读取文件路径:', file_path)

with open(file_path, 'r', encoding='utf-8') as f:
return f.read()

def merge_poems(date):
poems = ''
for item in date:
poems += item.get('contents', '')
return poems

def main():
try:
data = load_file()
logging.info(data[0])
poems = merge_poems(json.loads(data))
logging.info('result ==> %s', stats_word.stats_text_cn(poems,100))
except Exception as e:
logging.exception(e)

if __name__ == '__main__':
main()


34 changes: 34 additions & 0 deletions exercises/1901100050/d09/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from collections import Counter

# 统计参数中每个英⽂单词出现的次数
def stats_text_en(text, count):
elements = text.split()
words = []
symbols = ',.*-!'
for element in elements:
for symbol in symbols:
element = element.replace(symbol, '')
# 用 str 类型 的 isascii 方法判断是否是英文单词
if len(element) and element.isascii():
words.append(element)
return Counter(words).most_common(count)


# 统计参数中每个中文汉字出现的次数
def stats_text_cn(text, count):
cn_characters = []
for character in text:
# unicode 中 中文 字符的范围
if '\u4e00' <= character <= '\u9fff':
cn_characters.append(character)
return Counter(cn_characters).most_common(count)

def stats_text(text):
'''
合并 英文词频 和 中文词频 的结果
'''
if not isinstance(text, str):
raise ValueError('参数必须是 str 类型,输入类型 %s' % type(text))
return stats_text_en(text, count) + stats_text_cn(text, count)


33 changes: 33 additions & 0 deletions exercises/1901100050/d10/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from mymodule import stats_word
from os import path
import json
import re
import logging

logging.basicConfig(
format='file:%(filename)s|line:%(lineno)d|message:%(message)s', level=logging.DEBUG)

def load_file():
file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json')
print('当前文件路径:', __file__, '\n读取文件路径:', file_path)

with open(file_path, 'r', encoding='utf-8') as f:
return f.read()

def merge_poems(date):
poems = ''
for item in date:
poems += item.get('contents', '')
return poems

def main():
try:
data = load_file()
logging.info(data[0])
poems = merge_poems(json.loads(data))
logging.info('result ==> %s', stats_word.stats_text_cn(poems,100))
except Exception as e:
logging.exception(e)

if __name__ == '__main__':
main()
35 changes: 35 additions & 0 deletions exercises/1901100050/d10/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from collections import Counter
import jieba

# 统计参数中每个英⽂单词出现的次数
def stats_text_en(text, count):
elements = text.split()
words = []
symbols = ',.*-!'
for element in elements:
for symbol in symbols:
element = element.replace(symbol, '')
# 用 str 类型 的 isascii 方法判断是否是英文单词
if len(element) and element.isascii():
words.append(element)
return Counter(words).most_common(count)


# 统计参数中每个中文汉字出现的次数
def stats_text_cn(text, count):
words = jieba.cut(text)
tmp = []
for i in words:
if len(i) > 1:
tmp.append(i)
return Counter(tmp).most_common(count)

def stats_text(text):
'''
合并 英文词频 和 中文词频 的结果
'''
if not isinstance(text, str):
raise ValueError('参数必须是 str 类型,输入类型 %s' % type(text))
return stats_text_en(text, count) + stats_text_cn(text, count)