From 35b447693fe4ed3ed4eca4f9fb1c81171935876a Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Mon, 2 Sep 2019 14:51:38 +0800 Subject: [PATCH 01/11] day07 --- exercises/1901100362/07/main.py | 68 +++++++++++++++++ .../1901100362/07/mymodule/stats_word.py | 74 +++++++++++++++++++ exercises/1901100362/1001S02E05_string.py | 2 +- 3 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 exercises/1901100362/07/main.py create mode 100644 exercises/1901100362/07/mymodule/stats_word.py diff --git a/exercises/1901100362/07/main.py b/exercises/1901100362/07/main.py new file mode 100644 index 000000000..f6600fe95 --- /dev/null +++ b/exercises/1901100362/07/main.py @@ -0,0 +1,68 @@ +from mymodule import stats_word + +text = ''' +愚公移⼭山 +太⾏行行,王屋⼆二⼭山的北北⾯面,住了了⼀一個九⼗十歲的⽼老老翁,名叫愚公。⼆二⼭山佔地廣闊,擋住去路路,使他 +和家⼈人往來來極為不不便便。 +⼀一天,愚公召集家⼈人說:「讓我們各盡其⼒力力,剷平⼆二⼭山,開條道路路,直通豫州,你們認為怎 +樣?」 +⼤大家都異異⼝口同聲贊成,只有他的妻⼦子表示懷疑,並說:「你連開鑿⼀一個⼩小丘的⼒力力量量都沒有,怎 +可能剷平太⾏行行、王屋⼆二⼭山呢?況且,鑿出的⼟土⽯石⼜又丟到哪裏去呢?」 +⼤大家都熱烈烈地說:「把⼟土⽯石丟進渤海海裏。」 +於是愚公就和兒孫,⼀一起開挖⼟土,把⼟土⽯石搬運到渤海海去。 +愚公的鄰居是個寡婦,有個兒⼦子⼋八歲也興致勃勃地⾛走來來幫忙。 +寒來來暑往,他們要⼀一年年才能往返渤海海⼀一次。 +住在⿈黃河河畔的智叟,看⾒見見他們這樣⾟辛苦,取笑愚公說:「你不不是很愚蠢嗎?你已⼀一把年年紀 +了了,就是⽤用盡你的氣⼒力力,也不不能挖去⼭山的⼀一⻆角呢?」 +愚公歎息道:「你有這樣的成⾒見見,是不不會明⽩白的。你⽐比那寡婦的⼩小兒⼦子還不不如呢!就算我死 +了了,還有我的兒⼦子,我的孫⼦子,我的曾孫⼦子,他們⼀一直傳下去。⽽而這⼆二⼭山是不不會加⼤大的,總有 +⼀一天,我們會把它們剷平。」 +智叟聽了了,無話可說: +⼆二⼭山的守護神被愚公的堅毅精神嚇倒,便便把此事奏知天帝。天帝佩服愚公的精神,就命兩位⼤大 +⼒力力神揹⾛走⼆二⼭山。 +How The Foolish Old Man Moved Mountains +Yugong was a ninety-year-old man who lived at the north of two high +mountains, Mount Taixing and Mount Wangwu. +Stretching over a wide expanse of land, the mountains blocked + yugong’s way making it inconvenient for him and his family to get + around. +One day yugong gathered his family together and said,”Let’s do our + best to level these two mountains. We shall open a road that leads + to Yuzhou. What do you think?” +All but his wife agreed with him. +“You don’t have the strength to cut even a small mound,” muttered + his wife. “How on earth do you suppose you can level Mount Taixin + and Mount Wanwu? Moreover, where will all the earth and rubble go?” + “Dump them into the Sea of Bohai!” said everyone. +So Yugong, his sons, and his grandsons started to break up rocks and + remove the earth. They transported the earth and rubble to the Sea + of Bohai. +Now Yugong’s neighbour was a widow who had an only child eight years + old. Evening the young boy offered his help eagerly. + Summer went by and winter came. It took Yugong and his crew a full + year to travel back and forth once. + On the bank of the Yellow River dwelled an old man much respected +for his wisdom. When he saw their back-breaking labour, he ridiculed +Yugong saying,”Aren’t you foolish, my friend? You are very old now, +and with whatever remains of your waning strength, you won’t be able +to remove even a corner of the mountain.” +Yugong uttered a sigh and said,”A biased person like you will never + understand. You can’t even compare with the widow’s little boy!” + “Even if I were dead, there will still be my children, my + grandchildren, my great grandchildren, my great great grandchildren. +They descendants will go on forever. But these mountains will not + grow any taler. We shall level them one day!” he declared with + confidence. +The wise old man was totally silenced. +When the guardian gods of the mountains saw how determined Yugong + and his crew were, they were struck with fear and reported the + incident to the Emperor of Heavens. +Filled with admiration for Yugong, the Emperor of Heavens ordered + two mighty gods to carry the mountains away. +''' + + +if __name__ == '__main__': + + result = stats_word.stats_text(text) + print(result) \ No newline at end of file diff --git a/exercises/1901100362/07/mymodule/stats_word.py b/exercises/1901100362/07/mymodule/stats_word.py new file mode 100644 index 000000000..af96e9a97 --- /dev/null +++ b/exercises/1901100362/07/mymodule/stats_word.py @@ -0,0 +1,74 @@ +#统计参数中每个英文单词出现的次数 +def stats_text_en(text): + elements = text.split() + + words = [] + symbols = ',.“-*!' + + for element in elements: + for symbol in symbols: + #replace返回新的字符串,因此必须要新的字符串 + element = element.replace(symbol,'') + #将非空字符添加到列表中 + if len(element) : + words.append(element) + + #初始化一个counter字典,用来存放单词出现的频次 + counter = {} + + #去重,减少迭代次数 + word_set = set(words) + + for word in word_set: + counter[word] = words.count(word) + + #2.从小到大排序输出 + + return sorted(counter.items(), key = lambda x:x[1],reverse=True) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text): + cn_characters = [] + + for character in text: + #unicode中中文字符的范围 + if '\u4e00' <= character <= '\u9fff': + cn_characters.append(character) + + #print('中文字符列表==>',cn_characters) + + counter = {} + + #减少迭代次数 + cn_character_set = set(cn_characters) + for character in cn_character_set: + counter[character] = cn_characters.count(character) + return sorted(counter.items(),key = lambda x:x[1],reverse=True) + +def stats_text(text): + cn_text = text[:text.rfind('\u3002')+1] + en_text = text[text.rfind('\u3002')+1:] + #print('resule is ==>',**stats_text_cn(cn_text),**stats_text_en(en_text)) + cn_result = stats_text_cn(cn_text) + en_result = stats_text_en(en_text) +# print('cn_result==>',cn_result) +# print('en_result==>',en_result) + merged_result = cn_result+en_result + return (merged_result) + + + + + + + + + + + + + + + + + diff --git a/exercises/1901100362/1001S02E05_string.py b/exercises/1901100362/1001S02E05_string.py index 731308d38..9f7dc2e8b 100644 --- a/exercises/1901100362/1001S02E05_string.py +++ b/exercises/1901100362/1001S02E05_string.py @@ -22,7 +22,7 @@ #把better替换成worse t1 = text.replace('better','worse') -print(t1) +print("将所有better替换成worse==>",t1) print("-------------------------------------------------------------------------------") #删除字符串中的带ea的单词 From 58829a1c2c310e046716d8bf301a9a7728f1d23c Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Wed, 4 Sep 2019 14:25:27 +0800 Subject: [PATCH 02/11] update --- exercises/1901100362/07/main.py | 4 +- .../1901100362/07/mymodule/stats_word.py | 48 +++++++++++-------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/exercises/1901100362/07/main.py b/exercises/1901100362/07/main.py index f6600fe95..706348fc4 100644 --- a/exercises/1901100362/07/main.py +++ b/exercises/1901100362/07/main.py @@ -64,5 +64,5 @@ if __name__ == '__main__': - result = stats_word.stats_text(text) - print(result) \ No newline at end of file + result = stats_word.stats_text(text) + print(result) \ No newline at end of file diff --git a/exercises/1901100362/07/mymodule/stats_word.py b/exercises/1901100362/07/mymodule/stats_word.py index af96e9a97..c2888ecf5 100644 --- a/exercises/1901100362/07/mymodule/stats_word.py +++ b/exercises/1901100362/07/mymodule/stats_word.py @@ -1,6 +1,11 @@ #统计参数中每个英文单词出现的次数 -def stats_text_en(text): - elements = text.split() +import string + + +def stats_text_en(words): + + """ + elements = l_text.split() words = [] symbols = ',.“-*!' @@ -12,7 +17,7 @@ def stats_text_en(text): #将非空字符添加到列表中 if len(element) : words.append(element) - + """ #初始化一个counter字典,用来存放单词出现的频次 counter = {} @@ -27,32 +32,33 @@ def stats_text_en(text): return sorted(counter.items(), key = lambda x:x[1],reverse=True) #统计参数中每个中文字符出现的次数 -def stats_text_cn(text): - cn_characters = [] - - for character in text: - #unicode中中文字符的范围 - if '\u4e00' <= character <= '\u9fff': - cn_characters.append(character) - - #print('中文字符列表==>',cn_characters) +def stats_text_cn(words): + str_words = ''.join(words) + set_words = set(str_words) counter = {} - #减少迭代次数 - cn_character_set = set(cn_characters) - for character in cn_character_set: - counter[character] = cn_characters.count(character) + for ch in set_words: + counter[ch] = str_words.count(ch) return sorted(counter.items(),key = lambda x:x[1],reverse=True) def stats_text(text): - cn_text = text[:text.rfind('\u3002')+1] - en_text = text[text.rfind('\u3002')+1:] - #print('resule is ==>',**stats_text_cn(cn_text),**stats_text_en(en_text)) + + symbols = ',。:「」,?.”、!' + for symbol in symbols: + text = text.replace(symbol,' ') + l1 = text.split() + en_text = [] + cn_text = [] + for i in l1: + if len(i)>0: + if i[0] in string.ascii_letters: + en_text.append(i) + else: + cn_text.append(i) + cn_result = stats_text_cn(cn_text) en_result = stats_text_en(en_text) -# print('cn_result==>',cn_result) -# print('en_result==>',en_result) merged_result = cn_result+en_result return (merged_result) From 7b348b916575e5ecab4676f44e13b1b4caf3e7fc Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Wed, 4 Sep 2019 14:41:57 +0800 Subject: [PATCH 03/11] Update stats_word.py --- exercises/1901100362/07/mymodule/stats_word.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exercises/1901100362/07/mymodule/stats_word.py b/exercises/1901100362/07/mymodule/stats_word.py index c2888ecf5..cdecbfa7e 100644 --- a/exercises/1901100362/07/mymodule/stats_word.py +++ b/exercises/1901100362/07/mymodule/stats_word.py @@ -44,7 +44,7 @@ def stats_text_cn(words): def stats_text(text): - symbols = ',。:「」,?.”、!' + symbols = ',。:「」,?.”、-!' for symbol in symbols: text = text.replace(symbol,' ') l1 = text.split() From 09d52cc44ae3282d751e6402d1d3fcdc11971e55 Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Wed, 4 Sep 2019 14:44:36 +0800 Subject: [PATCH 04/11] Update stats_word.py --- exercises/1901100362/07/mymodule/stats_word.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/exercises/1901100362/07/mymodule/stats_word.py b/exercises/1901100362/07/mymodule/stats_word.py index cdecbfa7e..e82e799e4 100644 --- a/exercises/1901100362/07/mymodule/stats_word.py +++ b/exercises/1901100362/07/mymodule/stats_word.py @@ -4,20 +4,8 @@ def stats_text_en(words): - """ - elements = l_text.split() - - words = [] - symbols = ',.“-*!' - - for element in elements: - for symbol in symbols: - #replace返回新的字符串,因此必须要新的字符串 - element = element.replace(symbol,'') - #将非空字符添加到列表中 - if len(element) : - words.append(element) - """ + + #初始化一个counter字典,用来存放单词出现的频次 counter = {} From c175d886d73fae167b22c48e6f0395f0330fde43 Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Thu, 5 Sep 2019 13:50:20 +0800 Subject: [PATCH 05/11] day08 --- exercises/1901100362/08/main.py | 80 +++++++++++++++++++ .../1901100362/08/mymodule/stats_word.py | 77 ++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 exercises/1901100362/08/main.py create mode 100644 exercises/1901100362/08/mymodule/stats_word.py diff --git a/exercises/1901100362/08/main.py b/exercises/1901100362/08/main.py new file mode 100644 index 000000000..60e69b8e7 --- /dev/null +++ b/exercises/1901100362/08/main.py @@ -0,0 +1,80 @@ +from mymodule import stats_word +""" +text = ''' +愚公移⼭山 +太⾏行行,王屋⼆二⼭山的北北⾯面,住了了⼀一個九⼗十歲的⽼老老翁,名叫愚公。⼆二⼭山佔地廣闊,擋住去路路,使他 +和家⼈人往來來極為不不便便。 +⼀一天,愚公召集家⼈人說:「讓我們各盡其⼒力力,剷平⼆二⼭山,開條道路路,直通豫州,你們認為怎 +樣?」 +⼤大家都異異⼝口同聲贊成,只有他的妻⼦子表示懷疑,並說:「你連開鑿⼀一個⼩小丘的⼒力力量量都沒有,怎 +可能剷平太⾏行行、王屋⼆二⼭山呢?況且,鑿出的⼟土⽯石⼜又丟到哪裏去呢?」 +⼤大家都熱烈烈地說:「把⼟土⽯石丟進渤海海裏。」 +於是愚公就和兒孫,⼀一起開挖⼟土,把⼟土⽯石搬運到渤海海去。 +愚公的鄰居是個寡婦,有個兒⼦子⼋八歲也興致勃勃地⾛走來來幫忙。 +寒來來暑往,他們要⼀一年年才能往返渤海海⼀一次。 +住在⿈黃河河畔的智叟,看⾒見見他們這樣⾟辛苦,取笑愚公說:「你不不是很愚蠢嗎?你已⼀一把年年紀 +了了,就是⽤用盡你的氣⼒力力,也不不能挖去⼭山的⼀一⻆角呢?」 +愚公歎息道:「你有這樣的成⾒見見,是不不會明⽩白的。你⽐比那寡婦的⼩小兒⼦子還不不如呢!就算我死 +了了,還有我的兒⼦子,我的孫⼦子,我的曾孫⼦子,他們⼀一直傳下去。⽽而這⼆二⼭山是不不會加⼤大的,總有 +⼀一天,我們會把它們剷平。」 +智叟聽了了,無話可說: +⼆二⼭山的守護神被愚公的堅毅精神嚇倒,便便把此事奏知天帝。天帝佩服愚公的精神,就命兩位⼤大 +⼒力力神揹⾛走⼆二⼭山。 +How The Foolish Old Man Moved Mountains +Yugong was a ninety-year-old man who lived at the north of two high +mountains, Mount Taixing and Mount Wangwu. +Stretching over a wide expanse of land, the mountains blocked + yugong’s way making it inconvenient for him and his family to get + around. +One day yugong gathered his family together and said,”Let’s do our + best to level these two mountains. We shall open a road that leads + to Yuzhou. What do you think?” +All but his wife agreed with him. +“You don’t have the strength to cut even a small mound,” muttered + his wife. “How on earth do you suppose you can level Mount Taixin + and Mount Wanwu? Moreover, where will all the earth and rubble go?” + “Dump them into the Sea of Bohai!” said everyone. +So Yugong, his sons, and his grandsons started to break up rocks and + remove the earth. They transported the earth and rubble to the Sea + of Bohai. +Now Yugong’s neighbour was a widow who had an only child eight years + old. Evening the young boy offered his help eagerly. + Summer went by and winter came. It took Yugong and his crew a full + year to travel back and forth once. + On the bank of the Yellow River dwelled an old man much respected +for his wisdom. When he saw their back-breaking labour, he ridiculed +Yugong saying,”Aren’t you foolish, my friend? You are very old now, +and with whatever remains of your waning strength, you won’t be able +to remove even a corner of the mountain.” +Yugong uttered a sigh and said,”A biased person like you will never + understand. You can’t even compare with the widow’s little boy!” + “Even if I were dead, there will still be my children, my + grandchildren, my great grandchildren, my great great grandchildren. +They descendants will go on forever. But these mountains will not + grow any taler. We shall level them one day!” he declared with + confidence. +The wise old man was totally silenced. +When the guardian gods of the mountains saw how determined Yugong + and his crew were, they were struck with fear and reported the + incident to the Emperor of Heavens. +Filled with admiration for Yugong, the Emperor of Heavens ordered + two mighty gods to carry the mountains away. +''' +""" +text = 1 + + + +if __name__ == '__main__': + + try: + if type(text) != str: + raise ValueError + + result = stats_word.stats_text(text) + en_result = stats_word.stats_text_en(text) + cn_result = stats_word.stats_text_cn(text) + print(result) + except ValueError: + print('请输入字符串!') + diff --git a/exercises/1901100362/08/mymodule/stats_word.py b/exercises/1901100362/08/mymodule/stats_word.py new file mode 100644 index 000000000..7adf96378 --- /dev/null +++ b/exercises/1901100362/08/mymodule/stats_word.py @@ -0,0 +1,77 @@ +#统计参数中每个英文单词出现的次数 +import string + + +def stats_text_en(words): + + + + #初始化一个counter字典,用来存放单词出现的频次 + counter = {} + + #去重,减少迭代次数 + + word_set = set(words) + for word in word_set: + counter[word] = words.count(word) + + + #2.从小到大排序输出 + + return sorted(counter.items(), key = lambda x:x[1],reverse=True) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(words): + + + str_words = ''.join(words) + set_words = set(str_words) + counter = {} + + for ch in set_words: + counter[ch] = str_words.count(ch) + return sorted(counter.items(),key = lambda x:x[1],reverse=True) + + + + + +def stats_text(text): + + + symbols = ',。:「」,?.”、-!' + l1 = [] + for symbol in symbols: + text = text.replace(symbol,' ') + l1 = text.split() + + en_text = [] + cn_text = [] + for i in l1: + if len(i)>0: + if i[0] in string.ascii_letters: + en_text.append(i) + else: + cn_text.append(i) + + cn_result = stats_text_cn(cn_text) + en_result = stats_text_en(en_text) + merged_result = cn_result+en_result + return (merged_result) + + + + + + + + + + + + + + + + + From a7e9e6faa371e46b0119dcda5d758200c77e6294 Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Mon, 9 Sep 2019 15:54:28 +0800 Subject: [PATCH 06/11] update --- exercises/1901100362/08/main.py | 30 +++++++++++++------ .../1901100362/08/mymodule/stats_word.py | 15 +++++++--- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/exercises/1901100362/08/main.py b/exercises/1901100362/08/main.py index 60e69b8e7..dfd02635a 100644 --- a/exercises/1901100362/08/main.py +++ b/exercises/1901100362/08/main.py @@ -1,4 +1,7 @@ from mymodule import stats_word +import traceback +import logging + """ text = ''' 愚公移⼭山 @@ -63,18 +66,27 @@ """ text = 1 +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) if __name__ == '__main__': + test_traceback() + test_logger() - try: - if type(text) != str: - raise ValueError - result = stats_word.stats_text(text) - en_result = stats_word.stats_text_en(text) - cn_result = stats_word.stats_text_cn(text) - print(result) - except ValueError: - print('请输入字符串!') +# result = stats_word.stats_text(text) +# en_result = stats_word.stats_text_en(text) +# cn_result = stats_word.stats_text_cn(text) +# print(result) + diff --git a/exercises/1901100362/08/mymodule/stats_word.py b/exercises/1901100362/08/mymodule/stats_word.py index 7adf96378..70f43f3cd 100644 --- a/exercises/1901100362/08/mymodule/stats_word.py +++ b/exercises/1901100362/08/mymodule/stats_word.py @@ -3,7 +3,9 @@ def stats_text_en(words): - +# if type(words) != str: + if not isinstance(words,str): + raise ValueError("参数必须是字符串 %s" % type(words)) #初始化一个counter字典,用来存放单词出现的频次 @@ -22,10 +24,13 @@ def stats_text_en(words): #统计参数中每个中文字符出现的次数 def stats_text_cn(words): + + if type(words) != str: + raise ValueError("参数必须是字符串 %s" % type(text)) - str_words = ''.join(words) - set_words = set(str_words) + str_words = ''.join(words) + set_words = set(str_words) counter = {} for ch in set_words: @@ -37,7 +42,9 @@ def stats_text_cn(words): def stats_text(text): - + + if type(text) != str: + raise ValueError("请输入字符串!") symbols = ',。:「」,?.”、-!' l1 = [] From ec0f2ee97c81af865f1fe4decf8a7cd43d6bbb19 Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Wed, 11 Sep 2019 14:13:16 +0800 Subject: [PATCH 07/11] day09 --- exercises/1901100362/09/main.py | 28 ++++++++ .../1901100362/09/mymodule/stats_word.py | 72 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 exercises/1901100362/09/main.py create mode 100644 exercises/1901100362/09/mymodule/stats_word.py diff --git a/exercises/1901100362/09/main.py b/exercises/1901100362/09/main.py new file mode 100644 index 000000000..a28b4a39b --- /dev/null +++ b/exercises/1901100362/09/main.py @@ -0,0 +1,28 @@ +from mymodule import stats_word +import traceback +import logging +import json +import os + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': +# test_traceback() +# test_logger() +# print(stats_word.stats_text(text)) + filename = 'tang300.json' + filedir = os.path.dirname(os.path.realpath(__file__)) + with open(filedir+'/'+filename) as f: + f1 = f.read() + print(stats_word.stats_text_cn(f1,100)) \ No newline at end of file diff --git a/exercises/1901100362/09/mymodule/stats_word.py b/exercises/1901100362/09/mymodule/stats_word.py new file mode 100644 index 000000000..f774a61c1 --- /dev/null +++ b/exercises/1901100362/09/mymodule/stats_word.py @@ -0,0 +1,72 @@ +#统计参数中每个英文单词出现的次数 +import string +from collections import Counter + + +def stats_text_en(text,count): + if not isinstance(text,str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol,' ') + if len(element) and element.isascii(): + words.append(element) + + + + counter = Counter(words).most_common(count) + + return sorted(dict(counter).items(), key = lambda x:x[1],reverse=True) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + cn_characters = [] + for character in text: + if '\u4e00'<=character<='\u9fff': + cn_characters.append(character) + + + counter = Counter(cn_characters).most_common(count) + + return sorted(dict(counter).items(),key = lambda x:x[1],reverse=True) + + + + + +def stats_text(text): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + + return stats_text_cn(text,10) + stats_text_en(text,10) + + + + + + + + + + + + + + + + + From 5c9a246866fd00263ad3ccbe106c6dd09697eaba Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Thu, 12 Sep 2019 09:13:14 +0800 Subject: [PATCH 08/11] day10 --- exercises/1901100362/10/main.py | 28 +++++++ .../1901100362/10/mymodule/stats_word.py | 79 +++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 exercises/1901100362/10/main.py create mode 100644 exercises/1901100362/10/mymodule/stats_word.py diff --git a/exercises/1901100362/10/main.py b/exercises/1901100362/10/main.py new file mode 100644 index 000000000..c94fc31f2 --- /dev/null +++ b/exercises/1901100362/10/main.py @@ -0,0 +1,28 @@ +from mymodule import stats_word +import traceback +import logging +import json +import os + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': +# test_traceback() +# test_logger() +# print(stats_word.stats_text(text)) + filename = 'tang300.json' + filedir = os.path.dirname(os.path.realpath(__file__)) + with open(filedir+'/'+filename) as f: + f1 = f.read() + print(stats_word.stats_text_cn(f1,20)) \ No newline at end of file diff --git a/exercises/1901100362/10/mymodule/stats_word.py b/exercises/1901100362/10/mymodule/stats_word.py new file mode 100644 index 000000000..e405b67b2 --- /dev/null +++ b/exercises/1901100362/10/mymodule/stats_word.py @@ -0,0 +1,79 @@ +#统计参数中每个英文单词出现的次数 +#encoding = utf-8 +import string +from collections import Counter +import jieba + + +def stats_text_en(text,count): + if not isinstance(text,str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol,' ') + if len(element) and element.isascii(): + words.append(element) + + + + counter = Counter(words).most_common(count) + + return sorted(dict(counter).items(), key = lambda x:x[1],reverse=True) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + +# s1 = ' '.join(seg_list)) # 精确模式 + + cn_characters = [] + for character in text: + if '\u4e00'<=character<='\u9fff': + cn_characters.append(character) + s1 = ''.join(cn_characters) + #print(s1) + seg_list = [ element for element in jieba.cut(s1, cut_all=False) if len(element)>2] + + + counter = Counter(seg_list).most_common(count) + + return sorted(dict(counter).items(),key = lambda x:x[1],reverse=True) + + + + + +def stats_text(text): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + + return stats_text_cn(text,10) + stats_text_en(text,10) + + + + + + + + + + + + + + + + + From 58bf30c7b16640b7f1ad82562e8fd408a58bf8c9 Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Sun, 15 Sep 2019 17:25:18 +0800 Subject: [PATCH 09/11] day11 --- exercises/1901100362/11/main.py | 89 +++++++++++++++++++ .../1901100362/11/mymodule/stats_word.py | 74 +++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 exercises/1901100362/11/main.py create mode 100644 exercises/1901100362/11/mymodule/stats_word.py diff --git a/exercises/1901100362/11/main.py b/exercises/1901100362/11/main.py new file mode 100644 index 000000000..0a077f48a --- /dev/null +++ b/exercises/1901100362/11/main.py @@ -0,0 +1,89 @@ +from mymodule import stats_word +import traceback +import logging +import json +from os import path +import yagmail +from pyquery import PyQuery +import requests +import getpass + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +def load_file(): + + file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json') + print("当前文件路径==>",__file__,'\n读取文件路径==>',file_path) + ''' + tang300.json在上一级目录 + file_path = path.join(path.dirname(path.abspath(__file__)),'../tang300.json') + + tang300.json文件在上一级目录的data目录下的文件 + file_path = path.join(path.dirname(path.abspath(__file__)),'../data/tang300.json') + ''' + + with open(file_path,'r',encoding='utf-8') as f: + return f.read() + +#data为由json转换而来的字典 +def merge_poems(data): + poems = '' + for item in data: + poems +=item.get('contents','') + return poems + +def read_page(url): + response = requests.get(url) +# print(response.headers['content-type']) + document = PyQuery(response.text) + content = document('#js_content').text() + return stats_word.stats_text_cn(content,100) +def send_mail(content): + username = input('请输入邮件账号:') + passwd = input('请输入授权码:') + mail = yagmail.SMTP(user=username, + password=passwd, + host='smtp.163.com', + # smtp_ssl=True + ) #如果用的是qq邮箱或者你们公司的邮箱使用是安全协议的话,必须写上 smtp_ssl=True + mail.send( + to='pythoncamp@163.com', #如果多个收件人的话,写成list. + cc='13910255158@163.com',#抄送 + subject='张小龙',#邮件标题 + contents = content)#邮件正文 + # attachments=[r'C:\Users\Desktop\a.txt', + # r'C:\pp\b.txt'])#附件如果只有一个的话,用字符串就行,attachments=r'C:\\pp\\b.txt' + + +def main(): + try: + data = load_file() + logging.info(data[0]) + poems = merge_poems(json.loads(data)) + + logging.info('result ==> %s',stats_word.stats_text_cn(poems,100)) + print(stats_word.stats_text_cn(poems,20)) + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': +# test_traceback() +# test_logger() +# print(stats_word.stats_text(text)) +# main() + url = 'https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA' + contents = json.dumps(dict(read_page(url)),ensure_ascii=False) + send_mail(contents) + diff --git a/exercises/1901100362/11/mymodule/stats_word.py b/exercises/1901100362/11/mymodule/stats_word.py new file mode 100644 index 000000000..d10158335 --- /dev/null +++ b/exercises/1901100362/11/mymodule/stats_word.py @@ -0,0 +1,74 @@ +#统计参数中每个英文单词出现的次数 +#encoding = utf-8 +import string +from collections import Counter +import jieba + + +def stats_text_en(text,count): + if not isinstance(text,str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol,' ') + if len(element) and element.isascii(): + words.append(element) + + return Counter(words).most_common(count) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + +# s1 = ' '.join(seg_list)) # 精确模式 + + cn_characters = [] + for character in text: + if '\u4e00'<=character<='\u9fff': + cn_characters.append(character) + s1 = ''.join(cn_characters) + seg_list = [ element for element in jieba.cut(s1, cut_all=False) if len(element)>2] + + return Counter(seg_list).most_common(count) + + + + + + +def stats_text(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + return stats_text_cn(text,count) + stats_text_en(text,count) + + + + + + + + + + + + + + + + + From 3cfbfcbdf2e21597afa45e9a92279d6c1b72be07 Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Fri, 20 Sep 2019 10:21:13 +0800 Subject: [PATCH 10/11] day13 --- exercises/1901100362/13/main.py | 104 ++++++++++++++++++ .../1901100362/13/mymodule/stats_word.py | 52 +++++++++ 2 files changed, 156 insertions(+) create mode 100644 exercises/1901100362/13/main.py create mode 100644 exercises/1901100362/13/mymodule/stats_word.py diff --git a/exercises/1901100362/13/main.py b/exercises/1901100362/13/main.py new file mode 100644 index 000000000..e0dcb7b59 --- /dev/null +++ b/exercises/1901100362/13/main.py @@ -0,0 +1,104 @@ +from matplotlib import rcParams +from mymodule import stats_word +import traceback +import logging +import json +from os import path +import yagmail +from pyquery import PyQuery +import requests +import matplotlib.pyplot as plt +from wxpy import * +import numpy as np +from pylab import mpl + + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>", e) + print(traceback.format_exc()) + + +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + + +def load_file(): + file_path = path.join(path.dirname(path.abspath(__file__)), 'tang300.json') + print("当前文件路径==>", __file__, '\n读取文件路径==>', file_path) + ''' + tang300.json在上一级目录 + file_path = path.join(path.dirname(path.abspath(__file__)),'../tang300.json') + + tang300.json文件在上一级目录的data目录下的文件 + file_path = path.join(path.dirname(path.abspath(__file__)),'../data/tang300.json') + ''' + + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + + +# data为由json转换而来的字典 +def merge_poems(data): + poems = '' + for item in data: + poems += item.get('contents', '') + return poems + + +def get_article(): + r = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') + document = PyQuery(r.text) + return document('#js_content').text() + + +def main(): + try: + ''' + bot = Bot() + my_friend = bot.my_friends() + + @bot.register(my_friend,SHARING) + def auto_reply(msg): + response = requests.get(msg.url) + document = PyQuery(response.text) + content = document('#js_content').text() + ''' + article = get_article() + result = stats_word.stats_text_cn(article, 10) + + np_list = np.array(result) + word_list = [] + number_list = [] + + for i in range(len(np_list)): + word_list += [np_list[i][0]] + number_list += [int(np_list[i][1])] + + plt.rcdefaults() + + plt.rcParams['font.sans-serif'] = ['Arial Unicode MS'] + # plt.rcParams['axes.unicode_minus'] = False + fig,ax = plt.subplots() + y_pos = np.arange(len(word_list)) + + ax.barh(y_pos, number_list, align='center') + ax.set_yticks(y_pos) + ax.set_yticklabels(word_list) + ax.invert_yaxis() + ax.set_xlabel('计数') + ax.set_title('词频统计') + # plt.savefig("stats.png") + plt.show() + + embed() + + except Exception as e: + logging.exception(e) +if __name__ == '__main__': + main() diff --git a/exercises/1901100362/13/mymodule/stats_word.py b/exercises/1901100362/13/mymodule/stats_word.py new file mode 100644 index 000000000..834d0533f --- /dev/null +++ b/exercises/1901100362/13/mymodule/stats_word.py @@ -0,0 +1,52 @@ +# 统计参数中每个英文单词出现的次数 +# encoding = utf-8 +import string +from collections import Counter +import jieba + + +def stats_text_en(text, count): + if not isinstance(text, str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count, int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol, ' ') + if len(element) and element.isascii(): + words.append(element) + + return Counter(words).most_common(count) + + +# 统计参数中每个中文字符出现的次数 +def stats_text_cn(text, count): + if not isinstance(text, str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count, int): + raise ValueError("count参数必须是int %d" % type(count)) + + # s1 = ' '.join(seg_list)) # 精确模式 + + cn_characters = [] + for character in text: + if '\u4e00' <= character <= '\u9fff': + cn_characters.append(character) + s1 = ''.join(cn_characters) + seg_list = [element for element in jieba.cut(s1, cut_all=False) if len(element) > 2] + + return Counter(seg_list).most_common(count) + + +def stats_text(text, count): + if not isinstance(text, str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count, int): + raise ValueError("count参数必须是int %d" % type(count)) + + return stats_text_cn(text, count) + stats_text_en(text, count) From 0a8473075b93e9cc5c38d60f9a3df45a3167d3ee Mon Sep 17 00:00:00 2001 From: tony <13910255158@163.com> Date: Fri, 20 Sep 2019 11:45:26 +0800 Subject: [PATCH 11/11] day12 --- exercises/1901100362/12/main.py | 43 +++++++++++ .../1901100362/12/mymodule/stats_word.py | 74 +++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 exercises/1901100362/12/main.py create mode 100644 exercises/1901100362/12/mymodule/stats_word.py diff --git a/exercises/1901100362/12/main.py b/exercises/1901100362/12/main.py new file mode 100644 index 000000000..72a17632d --- /dev/null +++ b/exercises/1901100362/12/main.py @@ -0,0 +1,43 @@ +import traceback +import requests +from mymodule import stats_word +from pyquery import PyQuery +from wxpy import * + + +def test_traceback(): + try: + stats_word.stats_text(text) + except Exception as e: + print("trace_back==>",e) + print(traceback.format_exc()) +def test_logger(): + try: + stats_word.stats_text(text) + except Exception as e: + logging.exception(e) + +def main(): + try: + bot = Bot() + my_friend = bot.my_friends() + + @bot.register(my_friend,SHARING) + def auto_reply(msg): + response = requests.get(msg.url) + document = PyQuery(response.text) + content = document('#js_content').text() + result = stats_word.stats_text_cn(content,10) + return result + + except Exception as e: + logging.exception(e) + + +if __name__ == '__main__': +# test_traceback() +# test_logger() +# print(stats_word.stats_text(text)) + main() + + diff --git a/exercises/1901100362/12/mymodule/stats_word.py b/exercises/1901100362/12/mymodule/stats_word.py new file mode 100644 index 000000000..728d19ff7 --- /dev/null +++ b/exercises/1901100362/12/mymodule/stats_word.py @@ -0,0 +1,74 @@ +#统计参数中每个英文单词出现的次数 +#encoding = utf-8 +import string +from collections import Counter +from jieba import cut + + +def stats_text_en(text,count): + if not isinstance(text,str): + raise ValueError("text参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + elements = text.split() + words = [] + symbols = ',.*-!' + + for element in elements: + for symbol in symbols: + element.replace(symbol,' ') + if len(element) and element.isascii(): + words.append(element) + + return Counter(words).most_common(count) + +#统计参数中每个中文字符出现的次数 +def stats_text_cn(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + +# s1 = ' '.join(seg_list)) # 精确模式 + + cn_characters = [] + for character in text: + if '\u4e00'<=character<='\u9fff': + cn_characters.append(character) + s1 = ''.join(cn_characters) + seg_list = [element for element in cut(s1, cut_all=False) if len(element) > 2] + + return Counter(seg_list).most_common(count) + + + + + + +def stats_text(text,count): + + if not isinstance(text,str): + raise ValueError("参数必须是字符串 %s" % type(text)) + elif not isinstance(count,int): + raise ValueError("count参数必须是int %d" % type(count)) + + return stats_text_cn(text,count) + stats_text_en(text,count) + + + + + + + + + + + + + + + + +