diff --git a/exercises/1901100362/07/main.py b/exercises/1901100362/07/main.py new file mode 100644 index 000000000..706348fc4 --- /dev/null +++ b/exercises/1901100362/07/main.py @@ -0,0 +1,68 @@ +from mymodule import stats_word + +text = ''' +愚公移⼭山 +太⾏行行,王屋⼆二⼭山的北北⾯面,住了了⼀一個九⼗十歲的⽼老老翁,名叫愚公。⼆二⼭山佔地廣闊,擋住去路路,使他 +和家⼈人往來來極為不不便便。 +⼀一天,愚公召集家⼈人說:「讓我們各盡其⼒力力,剷平⼆二⼭山,開條道路路,直通豫州,你們認為怎 +樣?」 +⼤大家都異異⼝口同聲贊成,只有他的妻⼦子表示懷疑,並說:「你連開鑿⼀一個⼩小丘的⼒力力量量都沒有,怎 +可能剷平太⾏行行、王屋⼆二⼭山呢?況且,鑿出的⼟土⽯石⼜又丟到哪裏去呢?」 +⼤大家都熱烈烈地說:「把⼟土⽯石丟進渤海海裏。」 +於是愚公就和兒孫,⼀一起開挖⼟土,把⼟土⽯石搬運到渤海海去。 +愚公的鄰居是個寡婦,有個兒⼦子⼋八歲也興致勃勃地⾛走來來幫忙。 +寒來來暑往,他們要⼀一年年才能往返渤海海⼀一次。 +住在⿈黃河河畔的智叟,看⾒見見他們這樣⾟辛苦,取笑愚公說:「你不不是很愚蠢嗎?你已⼀一把年年紀 +了了,就是⽤用盡你的氣⼒力力,也不不能挖去⼭山的⼀一⻆角呢?」 +愚公歎息道:「你有這樣的成⾒見見,是不不會明⽩白的。你⽐比那寡婦的⼩小兒⼦子還不不如呢!就算我死 +了了,還有我的兒⼦子,我的孫⼦子,我的曾孫⼦子,他們⼀一直傳下去。⽽而這⼆二⼭山是不不會加⼤大的,總有 +⼀一天,我們會把它們剷平。」 +智叟聽了了,無話可說: +⼆二⼭山的守護神被愚公的堅毅精神嚇倒,便便把此事奏知天帝。天帝佩服愚公的精神,就命兩位⼤大 +⼒力力神揹⾛走⼆二⼭山。 +How The Foolish Old Man Moved Mountains +Yugong was a ninety-year-old man who lived at the north of two high +mountains, Mount Taixing and Mount Wangwu. +Stretching over a wide expanse of land, the mountains blocked + yugong’s way making it inconvenient for him and his family to get + around. +One day yugong gathered his family together and said,”Let’s do our + best to level these two mountains. We shall open a road that leads + to Yuzhou. What do you think?” +All but his wife agreed with him. +“You don’t have the strength to cut even a small mound,” muttered + his wife. “How on earth do you suppose you can level Mount Taixin + and Mount Wanwu? Moreover, where will all the earth and rubble go?” + “Dump them into the Sea of Bohai!” said everyone. +So Yugong, his sons, and his grandsons started to break up rocks and + remove the earth. They transported the earth and rubble to the Sea + of Bohai. +Now Yugong’s neighbour was a widow who had an only child eight years + old. Evening the young boy offered his help eagerly. + Summer went by and winter came. It took Yugong and his crew a full + year to travel back and forth once. + On the bank of the Yellow River dwelled an old man much respected +for his wisdom. When he saw their back-breaking labour, he ridiculed +Yugong saying,”Aren’t you foolish, my friend? You are very old now, +and with whatever remains of your waning strength, you won’t be able +to remove even a corner of the mountain.” +Yugong uttered a sigh and said,”A biased person like you will never + understand. You can’t even compare with the widow’s little boy!” + “Even if I were dead, there will still be my children, my + grandchildren, my great grandchildren, my great great grandchildren. +They descendants will go on forever. But these mountains will not + grow any taler. We shall level them one day!” he declared with + confidence. +The wise old man was totally silenced. +When the guardian gods of the mountains saw how determined Yugong + and his crew were, they were struck with fear and reported the + incident to the Emperor of Heavens. +Filled with admiration for Yugong, the Emperor of Heavens ordered + two mighty gods to carry the mountains away. +''' + + +if __name__ == '__main__': + + result = stats_word.stats_text(text) + print(result) \ No newline at end of file diff --git a/exercises/1901100362/07/mymodule/stats_word.py b/exercises/1901100362/07/mymodule/stats_word.py new file mode 100644 index 000000000..e82e799e4 --- /dev/null +++ b/exercises/1901100362/07/mymodule/stats_word.py @@ -0,0 +1,68 @@ +#统计参数中每个英文单词出现的次数 +import string + + +def stats_text_en(words): + + + + #初始化一个counter字典,用来存放单词出现的频次 + counter = {} + + #去重,减少迭代次数 + word_set = set(words) + + for word in word_set: + counter[word] = words.count(word) + + #2.从小到大排序输出 + + return sorted(counter.items(), key = lambda x:x[1],reverse=True) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(words): + str_words = ''.join(words) + set_words = set(str_words) + + counter = {} + + for ch in set_words: + counter[ch] = str_words.count(ch) + return sorted(counter.items(),key = lambda x:x[1],reverse=True) + +def stats_text(text): + + symbols = ',。:「」,?.”、-!' + for symbol in symbols: + text = text.replace(symbol,' ') + l1 = text.split() + en_text = [] + cn_text = [] + for i in l1: + if len(i)>0: + if i[0] in string.ascii_letters: + en_text.append(i) + else: + cn_text.append(i) + + cn_result = stats_text_cn(cn_text) + en_result = stats_text_en(en_text) + merged_result = cn_result+en_result + return (merged_result) + + + + + + + + + + + + + + + + + diff --git a/exercises/1901100362/08/main.py b/exercises/1901100362/08/main.py new file mode 100644 index 000000000..dfd02635a --- /dev/null +++ b/exercises/1901100362/08/main.py @@ -0,0 +1,92 @@ +from mymodule import stats_word +import traceback +import logging + +""" +text = ''' +愚公移⼭山 +太⾏行行,王屋⼆二⼭山的北北⾯面,住了了⼀一個九⼗十歲的⽼老老翁,名叫愚公。⼆二⼭山佔地廣闊,擋住去路路,使他 +和家⼈人往來來極為不不便便。 +⼀一天,愚公召集家⼈人說:「讓我們各盡其⼒力力,剷平⼆二⼭山,開條道路路,直通豫州,你們認為怎 +樣?」 +⼤大家都異異⼝口同聲贊成,只有他的妻⼦子表示懷疑,並說:「你連開鑿⼀一個⼩小丘的⼒力力量量都沒有,怎 +可能剷平太⾏行行、王屋⼆二⼭山呢?況且,鑿出的⼟土⽯石⼜又丟到哪裏去呢?」 +⼤大家都熱烈烈地說:「把⼟土⽯石丟進渤海海裏。」 +於是愚公就和兒孫,⼀一起開挖⼟土,把⼟土⽯石搬運到渤海海去。 +愚公的鄰居是個寡婦,有個兒⼦子⼋八歲也興致勃勃地⾛走來來幫忙。 +寒來來暑往,他們要⼀一年年才能往返渤海海⼀一次。 +住在⿈黃河河畔的智叟,看⾒見見他們這樣⾟辛苦,取笑愚公說:「你不不是很愚蠢嗎?你已⼀一把年年紀 +了了,就是⽤用盡你的氣⼒力力,也不不能挖去⼭山的⼀一⻆角呢?」 +愚公歎息道:「你有這樣的成⾒見見,是不不會明⽩白的。你⽐比那寡婦的⼩小兒⼦子還不不如呢!就算我死 +了了,還有我的兒⼦子,我的孫⼦子,我的曾孫⼦子,他們⼀一直傳下去。⽽而這⼆二⼭山是不不會加⼤大的,總有 +⼀一天,我們會把它們剷平。」 +智叟聽了了,無話可說: +⼆二⼭山的守護神被愚公的堅毅精神嚇倒,便便把此事奏知天帝。天帝佩服愚公的精神,就命兩位⼤大 +⼒力力神揹⾛走⼆二⼭山。 +How The Foolish Old Man Moved Mountains +Yugong was a ninety-year-old man who lived at the north of two high +mountains, Mount Taixing and Mount Wangwu. +Stretching over a wide expanse of land, the mountains blocked + yugong’s way making it inconvenient for him and his family to get + around. +One day yugong gathered his family together and said,”Let’s do our + best to level these two mountains. We shall open a road that leads + to Yuzhou. What do you think?” +All but his wife agreed with him. +“You don’t have the strength to cut even a small mound,” muttered + his wife. “How on earth do you suppose you can level Mount Taixin + and Mount Wanwu? Moreover, where will all the earth and rubble go?” + “Dump them into the Sea of Bohai!” said everyone. +So Yugong, his sons, and his grandsons started to break up rocks and + remove the earth. They transported the earth and rubble to the Sea + of Bohai. +Now Yugong’s neighbour was a widow who had an only child eight years + old. Evening the young boy offered his help eagerly. + Summer went by and winter came. It took Yugong and his crew a full + year to travel back and forth once. + On the bank of the Yellow River dwelled an old man much respected +for his wisdom. When he saw their back-breaking labour, he ridiculed +Yugong saying,”Aren’t you foolish, my friend? You are very old now, +and with whatever remains of your waning strength, you won’t be able +to remove even a corner of the mountain.” +Yugong uttered a sigh and said,”A biased person like you will never + understand. You can’t even compare with the widow’s little boy!” + “Even if I were dead, there will still be my children, my + grandchildren, my great grandchildren, my great great grandchildren. +They descendants will go on forever. But these mountains will not + grow any taler. We shall level them one day!” he declared with + confidence. +The wise old man was totally silenced. +When the guardian gods of the mountains saw how determined Yugong + and his crew were, they were struck with fear and reported the + incident to the Emperor of Heavens. +Filled with admiration for Yugong, the Emperor of Heavens ordered + two mighty gods to carry the mountains away. +''' +""" +text = 1 + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': + test_traceback() + test_logger() + + +# result = stats_word.stats_text(text) +# en_result = stats_word.stats_text_en(text) +# cn_result = stats_word.stats_text_cn(text) +# print(result) + + diff --git a/exercises/1901100362/08/mymodule/stats_word.py b/exercises/1901100362/08/mymodule/stats_word.py new file mode 100644 index 000000000..70f43f3cd --- /dev/null +++ b/exercises/1901100362/08/mymodule/stats_word.py @@ -0,0 +1,84 @@ +#统计参数中每个英文单词出现的次数 +import string + + +def stats_text_en(words): +# if type(words) != str: + if not isinstance(words,str): + raise ValueError("参数必须是字符串 %s" % type(words)) + + + #初始化一个counter字典,用来存放单词出现的频次 + counter = {} + + #去重,减少迭代次数 + + word_set = set(words) + for word in word_set: + counter[word] = words.count(word) + + + #2.从小到大排序输出 + + return sorted(counter.items(), key = lambda x:x[1],reverse=True) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(words): + + if type(words) != str: + raise ValueError("参数必须是字符串 %s" % type(text)) + + + str_words = ''.join(words) + set_words = set(str_words) + counter = {} + + for ch in set_words: + counter[ch] = str_words.count(ch) + return sorted(counter.items(),key = lambda x:x[1],reverse=True) + + + + + +def stats_text(text): + + if type(text) != str: + raise ValueError("请输入字符串!") + + symbols = ',。:「」,?.”、-!' + l1 = [] + for symbol in symbols: + text = text.replace(symbol,' ') + l1 = text.split() + + en_text = [] + cn_text = [] + for i in l1: + if len(i)>0: + if i[0] in string.ascii_letters: + en_text.append(i) + else: + cn_text.append(i) + + cn_result = stats_text_cn(cn_text) + en_result = stats_text_en(en_text) + merged_result = cn_result+en_result + return (merged_result) + + + + + + + + + + + + + + + + + diff --git a/exercises/1901100362/09/main.py b/exercises/1901100362/09/main.py new file mode 100644 index 000000000..a28b4a39b --- /dev/null +++ b/exercises/1901100362/09/main.py @@ -0,0 +1,28 @@ +from mymodule import stats_word +import traceback +import logging +import json +import os + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': +# test_traceback() +# test_logger() +# print(stats_word.stats_text(text)) + filename = 'tang300.json' + filedir = os.path.dirname(os.path.realpath(__file__)) + with open(filedir+'/'+filename) as f: + f1 = f.read() + print(stats_word.stats_text_cn(f1,100)) \ No newline at end of file diff --git a/exercises/1901100362/09/mymodule/stats_word.py b/exercises/1901100362/09/mymodule/stats_word.py new file mode 100644 index 000000000..f774a61c1 --- /dev/null +++ b/exercises/1901100362/09/mymodule/stats_word.py @@ -0,0 +1,72 @@ +#统计参数中每个英文单词出现的次数 +import string +from collections import Counter + + +def stats_text_en(text,count): + if not isinstance(text,str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol,' ') + if len(element) and element.isascii(): + words.append(element) + + + + counter = Counter(words).most_common(count) + + return sorted(dict(counter).items(), key = lambda x:x[1],reverse=True) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + cn_characters = [] + for character in text: + if '\u4e00'<=character<='\u9fff': + cn_characters.append(character) + + + counter = Counter(cn_characters).most_common(count) + + return sorted(dict(counter).items(),key = lambda x:x[1],reverse=True) + + + + + +def stats_text(text): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + + return stats_text_cn(text,10) + stats_text_en(text,10) + + + + + + + + + + + + + + + + + diff --git a/exercises/1901100362/10/main.py b/exercises/1901100362/10/main.py new file mode 100644 index 000000000..c94fc31f2 --- /dev/null +++ b/exercises/1901100362/10/main.py @@ -0,0 +1,28 @@ +from mymodule import stats_word +import traceback +import logging +import json +import os + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': +# test_traceback() +# test_logger() +# print(stats_word.stats_text(text)) + filename = 'tang300.json' + filedir = os.path.dirname(os.path.realpath(__file__)) + with open(filedir+'/'+filename) as f: + f1 = f.read() + print(stats_word.stats_text_cn(f1,20)) \ No newline at end of file diff --git a/exercises/1901100362/10/mymodule/stats_word.py b/exercises/1901100362/10/mymodule/stats_word.py new file mode 100644 index 000000000..e405b67b2 --- /dev/null +++ b/exercises/1901100362/10/mymodule/stats_word.py @@ -0,0 +1,79 @@ +#统计参数中每个英文单词出现的次数 +#encoding = utf-8 +import string +from collections import Counter +import jieba + + +def stats_text_en(text,count): + if not isinstance(text,str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol,' ') + if len(element) and element.isascii(): + words.append(element) + + + + counter = Counter(words).most_common(count) + + return sorted(dict(counter).items(), key = lambda x:x[1],reverse=True) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + +# s1 = ' '.join(seg_list)) # 精确模式 + + cn_characters = [] + for character in text: + if '\u4e00'<=character<='\u9fff': + cn_characters.append(character) + s1 = ''.join(cn_characters) + #print(s1) + seg_list = [ element for element in jieba.cut(s1, cut_all=False) if len(element)>2] + + + counter = Counter(seg_list).most_common(count) + + return sorted(dict(counter).items(),key = lambda x:x[1],reverse=True) + + + + + +def stats_text(text): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + + return stats_text_cn(text,10) + stats_text_en(text,10) + + + + + + + + + + + + + + + + + diff --git a/exercises/1901100362/1001S02E05_string.py b/exercises/1901100362/1001S02E05_string.py index 731308d38..9f7dc2e8b 100644 --- a/exercises/1901100362/1001S02E05_string.py +++ b/exercises/1901100362/1001S02E05_string.py @@ -22,7 +22,7 @@ #把better替换成worse t1 = text.replace('better','worse') -print(t1) +print("将所有better替换成worse==>",t1) print("-------------------------------------------------------------------------------") #删除字符串中的带ea的单词 diff --git a/exercises/1901100362/11/main.py b/exercises/1901100362/11/main.py new file mode 100644 index 000000000..0a077f48a --- /dev/null +++ b/exercises/1901100362/11/main.py @@ -0,0 +1,89 @@ +from mymodule import stats_word +import traceback +import logging +import json +from os import path +import yagmail +from pyquery import PyQuery +import requests +import getpass + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +def load_file(): + + file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json') + print("当前文件路径==>",__file__,'\n读取文件路径==>',file_path) + ''' + tang300.json在上一级目录 + file_path = path.join(path.dirname(path.abspath(__file__)),'../tang300.json') + + tang300.json文件在上一级目录的data目录下的文件 + file_path = path.join(path.dirname(path.abspath(__file__)),'../data/tang300.json') + ''' + + with open(file_path,'r',encoding='utf-8') as f: + return f.read() + +#data为由json转换而来的字典 +def merge_poems(data): + poems = '' + for item in data: + poems +=item.get('contents','') + return poems + +def read_page(url): + response = requests.get(url) +# print(response.headers['content-type']) + document = PyQuery(response.text) + content = document('#js_content').text() + return stats_word.stats_text_cn(content,100) +def send_mail(content): + username = input('请输入邮件账号:') + passwd = input('请输入授权码:') + mail = yagmail.SMTP(user=username, + password=passwd, + host='smtp.163.com', + # smtp_ssl=True + ) #如果用的是qq邮箱或者你们公司的邮箱使用是安全协议的话,必须写上 smtp_ssl=True + mail.send( + to='pythoncamp@163.com', #如果多个收件人的话,写成list. + cc='13910255158@163.com',#抄送 + subject='张小龙',#邮件标题 + contents = content)#邮件正文 + # attachments=[r'C:\Users\Desktop\a.txt', + # r'C:\pp\b.txt'])#附件如果只有一个的话,用字符串就行,attachments=r'C:\\pp\\b.txt' + + +def main(): + try: + data = load_file() + logging.info(data[0]) + poems = merge_poems(json.loads(data)) + + logging.info('result ==> %s',stats_word.stats_text_cn(poems,100)) + print(stats_word.stats_text_cn(poems,20)) + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': +# test_traceback() +# test_logger() +# print(stats_word.stats_text(text)) +# main() + url = 'https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA' + contents = json.dumps(dict(read_page(url)),ensure_ascii=False) + send_mail(contents) + diff --git a/exercises/1901100362/11/mymodule/stats_word.py b/exercises/1901100362/11/mymodule/stats_word.py new file mode 100644 index 000000000..d10158335 --- /dev/null +++ b/exercises/1901100362/11/mymodule/stats_word.py @@ -0,0 +1,74 @@ +#统计参数中每个英文单词出现的次数 +#encoding = utf-8 +import string +from collections import Counter +import jieba + + +def stats_text_en(text,count): + if not isinstance(text,str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol,' ') + if len(element) and element.isascii(): + words.append(element) + + return Counter(words).most_common(count) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + +# s1 = ' '.join(seg_list)) # 精确模式 + + cn_characters = [] + for character in text: + if '\u4e00'<=character<='\u9fff': + cn_characters.append(character) + s1 = ''.join(cn_characters) + seg_list = [ element for element in jieba.cut(s1, cut_all=False) if len(element)>2] + + return Counter(seg_list).most_common(count) + + + + + + +def stats_text(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + return stats_text_cn(text,count) + stats_text_en(text,count) + + + + + + + + + + + + + + + + + diff --git a/exercises/1901100362/12/main.py b/exercises/1901100362/12/main.py new file mode 100644 index 000000000..72a17632d --- /dev/null +++ b/exercises/1901100362/12/main.py @@ -0,0 +1,43 @@ +import traceback +import requests +from mymodule import stats_word +from pyquery import PyQuery +from wxpy import * + + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + +def main(): + try: + bot = Bot() + my_friend = bot.my_friends() + + @bot.register(my_friend,SHARING) + def auto_reply(msg): + response = requests.get(msg.url) + document = PyQuery(response.text) + content = document('#js_content').text() + result = stats_word.stats_text_cn(content,10) + return result + + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': +# test_traceback() +# test_logger() +# print(stats_word.stats_text(text)) + main() + + diff --git a/exercises/1901100362/12/mymodule/stats_word.py b/exercises/1901100362/12/mymodule/stats_word.py new file mode 100644 index 000000000..728d19ff7 --- /dev/null +++ b/exercises/1901100362/12/mymodule/stats_word.py @@ -0,0 +1,74 @@ +#统计参数中每个英文单词出现的次数 +#encoding = utf-8 +import string +from collections import Counter +from jieba import cut + + +def stats_text_en(text,count): + if not isinstance(text,str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol,' ') + if len(element) and element.isascii(): + words.append(element) + + return Counter(words).most_common(count) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + +# s1 = ' '.join(seg_list)) # 精确模式 + + cn_characters = [] + for character in text: + if '\u4e00'<=character<='\u9fff': + cn_characters.append(character) + s1 = ''.join(cn_characters) + seg_list = [element for element in cut(s1, cut_all=False) if len(element) > 2] + + return Counter(seg_list).most_common(count) + + + + + + +def stats_text(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + return stats_text_cn(text,count) + stats_text_en(text,count) + + + + + + + + + + + + + + + + + diff --git a/exercises/1901100362/13/main.py b/exercises/1901100362/13/main.py new file mode 100644 index 000000000..e0dcb7b59 --- /dev/null +++ b/exercises/1901100362/13/main.py @@ -0,0 +1,104 @@ +from matplotlib import rcParams +from mymodule import stats_word +import traceback +import logging +import json +from os import path +import yagmail +from pyquery import PyQuery +import requests +import matplotlib.pyplot as plt +from wxpy import * +import numpy as np +from pylab import mpl + + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>", e) + print(traceback.format_exc()) + + +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +def load_file(): + file_path = path.join(path.dirname(path.abspath(__file__)), 'tang300.json') + print("当前文件路径==>", __file__, '\n读取文件路径==>', file_path) + ''' + tang300.json在上一级目录 + file_path = path.join(path.dirname(path.abspath(__file__)),'../tang300.json') + + tang300.json文件在上一级目录的data目录下的文件 + file_path = path.join(path.dirname(path.abspath(__file__)),'../data/tang300.json') + ''' + + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + + +# data为由json转换而来的字典 +def merge_poems(data): + poems = '' + for item in data: + poems += item.get('contents', '') + return poems + + +def get_article(): + r = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') + document = PyQuery(r.text) + return document('#js_content').text() + + +def main(): + try: + ''' + bot = Bot() + my_friend = bot.my_friends() + + @bot.register(my_friend,SHARING) + def auto_reply(msg): + response = requests.get(msg.url) + document = PyQuery(response.text) + content = document('#js_content').text() + ''' + article = get_article() + result = stats_word.stats_text_cn(article, 10) + + np_list = np.array(result) + word_list = [] + number_list = [] + + for i in range(len(np_list)): + word_list += [np_list[i][0]] + number_list += [int(np_list[i][1])] + + plt.rcdefaults() + + plt.rcParams['font.sans-serif'] = ['Arial Unicode MS'] + # plt.rcParams['axes.unicode_minus'] = False + fig,ax = plt.subplots() + y_pos = np.arange(len(word_list)) + + ax.barh(y_pos, number_list, align='center') + ax.set_yticks(y_pos) + ax.set_yticklabels(word_list) + ax.invert_yaxis() + ax.set_xlabel('计数') + ax.set_title('词频统计') + # plt.savefig("stats.png") + plt.show() + + embed() + + except Exception as e: + logging.exception(e) +if __name__ == '__main__': + main() diff --git a/exercises/1901100362/13/mymodule/stats_word.py b/exercises/1901100362/13/mymodule/stats_word.py new file mode 100644 index 000000000..834d0533f --- /dev/null +++ b/exercises/1901100362/13/mymodule/stats_word.py @@ -0,0 +1,52 @@ +# 统计参数中每个英文单词出现的次数 +# encoding = utf-8 +import string +from collections import Counter +import jieba + + +def stats_text_en(text, count): + if not isinstance(text, str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count, int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol, ' ') + if len(element) and element.isascii(): + words.append(element) + + return Counter(words).most_common(count) + + +# 统计参数中每个中文字符出现的次数 +def stats_text_cn(text, count): + if not isinstance(text, str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count, int): + raise ValueError("count参数必须是int %d" % type(count)) + + # s1 = ' '.join(seg_list)) # 精确模式 + + cn_characters = [] + for character in text: + if '\u4e00' <= character <= '\u9fff': + cn_characters.append(character) + s1 = ''.join(cn_characters) + seg_list = [element for element in jieba.cut(s1, cut_all=False) if len(element) > 2] + + return Counter(seg_list).most_common(count) + + +def stats_text(text, count): + if not isinstance(text, str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count, int): + raise ValueError("count参数必须是int %d" % type(count)) + + return stats_text_cn(text, count) + stats_text_en(text, count)