Skip to content
4 changes: 3 additions & 1 deletion exercises/PanChuang2019/D08/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ def test_logger():


if __name__=="__main__":
# stats_word.stats.text(1)

#stats_word.stats_text(1)
test_traceback()
test_logger()




40 changes: 40 additions & 0 deletions exercises/PanChuang2019/D09/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from mymodule import stats_word
from os import path
import json
import re
import logging


logging.basicConfig(
format='file:%(filename)s[line:%(lineno)d]message:%(message)s',level=logging.DEBUG)


def load_file():
file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json')
print('当前文件路径:',__file__,'\n读取文件路径:',file_path)
with open(file_path,'r',encoding='utf-8') as f:
return f.read()

def merge_poems(data):
poems = ''
for item in data:
poems += item.get('contents','')
return poems

def main():
try:
data=load_file()
logging.info(data[0])
poems = merge_poems(json.loads(data))
logging.info('result ==> %s',stats_word.stats_text_cn(poems,100))
except Exception as e:
logging.exception(e)


if __name__=="__main__":
main()





27 changes: 27 additions & 0 deletions exercises/PanChuang2019/D09/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from collections import Counter

def stats_text_en(text,count):
elements= text.split()
words=[]
symbols=',.*-!'
for element in elements:
for symbol in symbols:
element=element.replace(symbol,'')
if len(element) and element.isascii():
words.append(element)
return Counter(words).most_common(count)

def stats_text_cn(text,count):
cn_characters=[]
for character in text:
if '\u4e00'<= character<='\u9fff':
cn_characters.append(character)
return Counter(cn_characters).most_common(count)

#合并中文词频和英文词频的结果
def stats_text(text,count):
if not isinstance(text,str):
raise ValueError('参数必须是str类型,输入类型%s' % type(text))
return stats_text_cn(text,count) + stats_text_en(text,count)


2,235 changes: 2,235 additions & 0 deletions exercises/PanChuang2019/D09/tang300.json

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions exercises/PanChuang2019/D10/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from mymodule import stats_word
from os import path
import json
import re
import logging


logging.basicConfig(
format='file:%(filename)s[line:%(lineno)d]message:%(message)s',level=logging.DEBUG)


def load_file():
file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json')
print('当前文件路径:',__file__,'\n读取文件路径:',file_path)
with open(file_path,'r',encoding='utf-8') as f:
return f.read()

def merge_poems(data):
poems = ''
for item in data:
poems += item.get('contents','')
return poems

def main():
try:
data=load_file()
#logging.info(data[0])
poems = merge_poems(json.loads(data))
logging.info('result ==> %s',stats_word.stats_text_cn(poems,20))
except Exception as e:
logging.exception(e)


if __name__=="__main__":
main()





29 changes: 29 additions & 0 deletions exercises/PanChuang2019/D10/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from collections import Counter
import jieba

def stats_text_en(text,count):
elements= text.split()
words=[]
symbols=',.*-!'
for element in elements:
for symbol in symbols:
element=element.replace(symbol,'')
if len(element) and element.isascii():
words.append(element)
return Counter(words).most_common(count)

def stats_text_cn(text,count):
tmp=[]
words=jieba.cut(text)
for i in words:
if len(i)>=2:
tmp.append(i)
return Counter(tmp).most_common(count)

#合并中文词频和英文词频的结果
def stats_text(text,count):
if not isinstance(text,str):
raise ValueError('参数必须是str类型,输入类型%s' % type(text))
return stats_text_cn(text,count) + stats_text_en(text,count)


Loading