From 25539f8aed6109a5596717378a1e8a47e3a8cb0e Mon Sep 17 00:00:00 2001 From: Konaair <31443943+Konaair@users.noreply.github.com> Date: Thu, 12 Sep 2019 15:07:18 +0200 Subject: [PATCH 1/3] Day7 --- exercises/1901050193/1001S02E05_stats_text.py | 2 +- exercises/1901050193/1001S02E05_string.py | 3 +- exercises/1901050193/1001S02E06_stats_word.py | 12 +- exercises/1901050193/README.md | 5 + exercises/1901050193/d07/mymodule/main.py | 66 ++++++++++ .../1901050193/d07/mymodule/stats_word.py | 119 ++++++++++++++++++ 6 files changed, 203 insertions(+), 4 deletions(-) create mode 100644 exercises/1901050193/d07/mymodule/main.py create mode 100644 exercises/1901050193/d07/mymodule/stats_word.py diff --git a/exercises/1901050193/1001S02E05_stats_text.py b/exercises/1901050193/1001S02E05_stats_text.py index 7bac240a9..c05d2e966 100644 --- a/exercises/1901050193/1001S02E05_stats_text.py +++ b/exercises/1901050193/1001S02E05_stats_text.py @@ -32,7 +32,7 @@ counter = {} #建立空字典 wordset = set(wordlist) #单词集合 -for singleword in wordlist: +for singleword in wordset: counter[singleword] = wordlist.count(singleword) print('计数:',counter) diff --git a/exercises/1901050193/1001S02E05_string.py b/exercises/1901050193/1001S02E05_string.py index d7476dc00..4da8ee593 100644 --- a/exercises/1901050193/1001S02E05_string.py +++ b/exercises/1901050193/1001S02E05_string.py @@ -42,4 +42,5 @@ #1.4 Sort from a to z print('1.4 Sort from a to z.',end='\n') -print(sorted(swap)) \ No newline at end of file +print(sorted(swap)) +#如果想从z到a排序 要怎么排呢 reverse=True并不行 diff --git a/exercises/1901050193/1001S02E06_stats_word.py b/exercises/1901050193/1001S02E06_stats_word.py index d063172d8..4e2b3b05c 100644 --- a/exercises/1901050193/1001S02E06_stats_word.py +++ b/exercises/1901050193/1001S02E06_stats_word.py @@ -13,7 +13,7 @@ def stats_text_en(text): for singleword in wordset: counter[singleword] = wordlist.count(singleword) - return sorted(counter.items(), key=lambda x: x[1], reverse=True) #返回函数结果 + return sorted(counter.items(), key=lambda x: x[1], reverse=True) #返回函数结果 #定义函数2:统计文档中中文单词出现的次数并按照频率降序排列。 @@ -108,4 +108,12 @@ def stats_text_cn(text): en_result = stats_text_en(en_text) #给函数的返回结果一个值 cn_result = stats_text_cn(cn_text) #中文同上 print('英文单词按出现次数降序排列:\n', en_result) - print('中文单字按出现次数降序排列:\n', cn_result) \ No newline at end of file + print('中文单字按出现次数降序排列:\n', cn_result) + +#在python中,每个模块都有一个叫_name_的内置变量,这个变量的值会根据该模块被使用的方式而变化: +# 1、假设模块A.py 在另一个模块 B.py 中,被作为模块导入,则_name_的值为模块 A.py 的名称 +# 2、假设模块 A.py被直接执行,则_name_ 的值为_main_ +# 英文参考:https://stackoverflow.com/questions/419163/what-does-if-name-main-do + +#那么中文按照拼音开头排序呢 + diff --git a/exercises/1901050193/README.md b/exercises/1901050193/README.md index e69de29bb..97d0d3da4 100644 --- a/exercises/1901050193/README.md +++ b/exercises/1901050193/README.md @@ -0,0 +1,5 @@ +#一些零碎的记录Day05 +1.python中的变量不需要声明,但使用时必须赋值 + 1.整形变量 + 2.浮点型变量 + 3.字符型 diff --git a/exercises/1901050193/d07/mymodule/main.py b/exercises/1901050193/d07/mymodule/main.py new file mode 100644 index 000000000..7ea92cb24 --- /dev/null +++ b/exercises/1901050193/d07/mymodule/main.py @@ -0,0 +1,66 @@ +from stats_word import stats_text + +sample_text = ''' +愚公移⼭山 +太⾏行行,王屋⼆二⼭山的北北⾯面,住了了⼀一個九⼗十歲的⽼老老翁,名叫愚公。⼆二⼭山佔地廣闊,擋住去路路,使他 +和家⼈人往來來極為不不便便。 +⼀一天,愚公召集家⼈人說:「讓我們各盡其⼒力力,剷平⼆二⼭山,開條道路路,直通豫州,你們認為怎 +樣?」 +⼤大家都異異⼝口同聲贊成,只有他的妻⼦子表示懷疑,並說:「你連開鑿⼀一個⼩小丘的⼒力力量量都沒有,怎 +可能剷平太⾏行行、王屋⼆二⼭山呢?況且,鑿出的⼟土⽯石⼜又丟到哪裏去呢?」 +⼤大家都熱烈烈地說:「把⼟土⽯石丟進渤海海裏。」 +於是愚公就和兒孫,⼀一起開挖⼟土,把⼟土⽯石搬運到渤海海去。 +愚公的鄰居是個寡婦,有個兒⼦子⼋八歲也興致勃勃地⾛走來來幫忙。 +寒來來暑往,他們要⼀一年年才能往返渤海海⼀一次。 +住在⿈黃河河畔的智叟,看⾒見見他們這樣⾟辛苦,取笑愚公說:「你不不是很愚蠢嗎?你已⼀一把年年紀 +了了,就是⽤用盡你的氣⼒力力,也不不能挖去⼭山的⼀一⻆角呢?」 +愚公歎息道:「你有這樣的成⾒見見,是不不會明⽩白的。你⽐比那寡婦的⼩小兒⼦子還不不如呢!就算我死 +了了,還有我的兒⼦子,我的孫⼦子,我的曾孫⼦子,他們⼀一直傳下去。⽽而這⼆二⼭山是不不會加⼤大的,總有 +⼀一天,我們會把它們剷平。」 +智叟聽了了,無話可說: +⼆二⼭山的守護神被愚公的堅毅精神嚇倒,便便把此事奏知天帝。天帝佩服愚公的精神,就命兩位⼤大 +⼒力力神揹⾛走⼆二⼭山。 +How The Foolish Old Man Moved Mountains +Yugong was a ninety-year-old man who lived at the north of two high +mountains, Mount Taixing and Mount Wangwu. +Stretching over a wide expanse of land, the mountains blocked +yugong’s way making it inconvenient for him and his family to get +around. +One day yugong gathered his family together and said,”Let’s do our +best to level these two mountains. We shall open a road that leads +to Yuzhou. What do you think?” +All but his wife agreed with him. +“You don’t have the strength to cut even a small mound,” muttered +his wife. “How on earth do you suppose you can level Mount Taixin +and Mount Wanwu? Moreover, where will all the earth and rubble go?” +“Dump them into the Sea of Bohai!” said everyone. +So Yugong, his sons, and his grandsons started to break up rocks and +remove the earth. They transported the earth and rubble to the Sea +of Bohai. +Now Yugong’s neighbour was a widow who had an only child eight years +old. Evening the young boy offered his help eagerly. +Summer went by and winter came. It took Yugong and his crew a full +year to travel back and forth once. +On the bank of the Yellow River dwelled an old man much respected +for his wisdom. When he saw their back-breaking labour, he ridiculed +Yugong saying,”Aren’t you foolish, my friend? You are very old now, +and with whatever remains of your waning strength, you won’t be able +to remove even a corner of the mountain.” +Yugong uttered a sigh and said,”A biased person like you will never +understand. You can’t even compare with the widow’s little boy!” +“Even if I were dead, there will still be my children, my +grandchildren, my great grandchildren, my great great grandchildren. +They descendants will go on forever. But these mountains will not +grow any taler. We shall level them one day!” he declared with +confidence. +The wise old man was totally silenced. +When the guardian gods of the mountains saw how determined Yugong +and his crew were, they were struck with fear and reported the +incident to the Emperor of Heavens. +Filled with admiration for Yugong, the Emperor of Heavens ordered +two mighty gods to carry the mountains away. +''' + +result = stats_text(sample_text) + +print('中英统计结果=', result) \ No newline at end of file diff --git a/exercises/1901050193/d07/mymodule/stats_word.py b/exercises/1901050193/d07/mymodule/stats_word.py new file mode 100644 index 000000000..97c550251 --- /dev/null +++ b/exercises/1901050193/d07/mymodule/stats_word.py @@ -0,0 +1,119 @@ +#定义函数1:统计参数中每个英⽂文单词出现的次数,最后返回⼀个按词频降序排列列的数组。 +def stats_text_en(text): + words = text.split() + wordlist = [] + symbols = ',.*-!' + for word in words: + for symbol in symbols: + word = word.replace(symbol,'') + if len(word) and word.isascii(): + wordlist.append(word) + counter = {} + wordset = set(wordlist) + + for singleword in wordset: + counter[singleword] = wordlist.count(singleword) + return sorted(counter.items(), key=lambda x: x[1], reverse=True) #返回函数结果 + + +#定义函数2:统计文档中中文单词出现的次数并按照频率降序排列。 +def stats_text_cn(text): + cnsymbols=[] + for cnsymbol in text: + if '\u4e00'<= cnsymbol<='\u9fff': #判断是否属于中文单字,可以直接过滤掉符号等等了。 + cnsymbols.append(cnsymbol) + counter={} + cnsymbols_set=set(cnsymbols) + for cnsymbol in cnsymbols_set: + counter[cnsymbol]=cnsymbols.count(cnsymbol) + return sorted(counter.items(), key = lambda x: x[1], reverse=True) #同上返回函数结果 + + +def stats_text(text): +#合并 英文词频 和中文词频 的结果 + return stats_text_en(text) + stats_text_cn(text) + + +en_text=''' +The Zen of Python, by Tim Peters +Beautiful is better than ugly. +Explicit is better than implicit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases aren't special enough to break the rules. +Although practicality beats purity. +Errors should never pass silently. +Unless explicitly silenced. +In the face of ambxiguity, refuse the temptation to guess. +There should be one-- and preferably only one --obvious way to do +it. +Although that way may not be obvious at first unless you're Dutch. +Now is better than never. +Although never is often better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it may be a good idea. +Namespaces are one honking great idea -- let's do more of those!" +''' +cn_text=''' +优美优于丑陋, + +明了优于隐晦; + +简单优于复杂, + +复杂优于凌乱, + +扁平优于嵌套, + +可读性很重要! + +即使实用比纯粹更优, + +特例亦不可违背原则。 + +错误绝不能悄悄忽略, + +除非它明确需要如此。 + +面对不确定性, + +拒绝妄加猜测。 + +任何问题应有一种, + +且最好只有一种, + +显而易见的解决方法。 + +尽管这方法一开始并非如此直观, + +除非你是荷兰人。 + +做优于不做, + +然而不假思索还不如不做。 + +很难解释的,必然是坏方法。 + +很好解释的,可能是好方法。 + +命名空间是个绝妙的主意, + +我们应好好利用它。 + +''' + +if __name__=='__main__': + # if _name_=='_main_':"的作用在于:如果直接执行含有该语句的模块,则执行该语句后续部分; + # 若在另一个模块中调用含有该语句的模块时,该语句的后续部分不执行。 + + en_result = stats_text_en(en_text) #给函数的返回结果一个值 + cn_result = stats_text_cn(cn_text) #中文同上 + print('英文单词按出现次数降序排列:\n', en_result) + print('中文单字按出现次数降序排列:\n', cn_result) + + + From 24036881010756e718ceca526742ef0cb78a5b87 Mon Sep 17 00:00:00 2001 From: Konaair <31443943+Konaair@users.noreply.github.com> Date: Mon, 16 Sep 2019 12:02:24 +0200 Subject: [PATCH 2/3] Day09 --- exercises/1901050193/d09/mymodule/main.py | 9 ++ .../1901050193/d09/mymodule/stats_word.py | 127 ++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 exercises/1901050193/d09/mymodule/main.py create mode 100644 exercises/1901050193/d09/mymodule/stats_word.py diff --git a/exercises/1901050193/d09/mymodule/main.py b/exercises/1901050193/d09/mymodule/main.py new file mode 100644 index 000000000..50a9a454a --- /dev/null +++ b/exercises/1901050193/d09/mymodule/main.py @@ -0,0 +1,9 @@ +import stats_word + +with open('D:\\文档\\Pythonlernen-ss01\\selfteaching-python-camp\\exercises\\1901050193\\d09\\mymodule\\tang300.json',mode='r', encoding='UTF-8') as f: + text = f.read() + +try: + stats_word.stats_text(text,100) +except ValueError: + print('非字符串,请重新输入') \ No newline at end of file diff --git a/exercises/1901050193/d09/mymodule/stats_word.py b/exercises/1901050193/d09/mymodule/stats_word.py new file mode 100644 index 000000000..21184a76f --- /dev/null +++ b/exercises/1901050193/d09/mymodule/stats_word.py @@ -0,0 +1,127 @@ +import re +from collections import Counter + +#定义函数1:统计参数中每个英⽂文单词出现的次数,最后返回⼀个按词频降序排列列的数组。 +def stats_text_en(text,count): + if type(text)!= str: + raise ValueError('非字符串类型') #遇到非字符串类型时,抛出错误 + else: + pass + + + words = text.split() + wordlist = [] + symbols = ',.*-!' #过滤符号 + + + for word in words: + for symbol in symbols: + word = word.replace(symbol,'') + if len(word) and word.isascii(): + wordlist.append(word) + + + counter = Counter(wordlist).most_common(count) #使⽤用标准库中的 Counter 来完善统计功能 + return sorted(dict(counter).items(), key = lambda x:x[1],reverse = True) + + + + +#定义函数2:统计文档中中文单词出现的次数并按照频率降序排列。 +def stats_text_cn(text,count): + if type(text)!= str: + raise ValueError('非字符串类型') + else: + pass + + + cnsymbols=[] + for cnsymbol in text: + if '\u4e00'<= cnsymbol<='\u9fff': #判断是否属于中文单字,可以直接过滤掉符号等等了。 + cnsymbols.append(cnsymbol) + + + counter = Counter(cnsymbols).most_common(count) #使⽤用标准库中的 Counter 来完善统计功能 + return sorted(dict(counter).items(), key = lambda x:x[1],reverse = True) + +def stats_text(text,count): +#合并 英文词频 和中文词频 的结果 + if type(text)!= str: + raise ValueError('非字符串类型') + else: + pass + print("文本中的中文汉字词频为:\n", stats_text_cn(text, count)) + print("文本中的英文单词词频为:\n", stats_text_en(text, count)) + + +en_text=''' +The Zen of Python, by Tim Peters +Beautiful is better than ugly. +Explicit is better than implicit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases aren't special enough to break the rules. +Although practicality beats purity. +Errors should never pass silently. +Unless explicitly silenced. +In the face of ambxiguity, refuse the temptation to guess. +There should be one-- and preferably only one --obvious way to do +it. +Although that way may not be obvious at first unless you're Dutch. +Now is better than never. +Although never is often better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it may be a good idea. +Namespaces are one honking great idea -- let's do more of those!" +''' +cn_text=''' +优美优于丑陋, + +明了优于隐晦; + +简单优于复杂, + +复杂优于凌乱, + +扁平优于嵌套, + +可读性很重要! + +即使实用比纯粹更优, + +特例亦不可违背原则。 + +错误绝不能悄悄忽略, + +除非它明确需要如此。 + +面对不确定性, + +拒绝妄加猜测。 + +任何问题应有一种, + +且最好只有一种, + +显而易见的解决方法。 + +尽管这方法一开始并非如此直观, + +除非你是荷兰人。 + +做优于不做, + +然而不假思索还不如不做。 + +很难解释的,必然是坏方法。 + +很好解释的,可能是好方法。 + +命名空间是个绝妙的主意, + +我们应好好利用它。 + +''' From ce9a07ff80c3841987bb71908438739d808c91f2 Mon Sep 17 00:00:00 2001 From: Konaair <31443943+Konaair@users.noreply.github.com> Date: Tue, 17 Sep 2019 20:35:43 +0200 Subject: [PATCH 3/3] Day 10 --- exercises/1901050193/d10/mymodule/main.py | 11 ++ .../1901050193/d10/mymodule/stats_word.py | 102 ++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 exercises/1901050193/d10/mymodule/main.py create mode 100644 exercises/1901050193/d10/mymodule/stats_word.py diff --git a/exercises/1901050193/d10/mymodule/main.py b/exercises/1901050193/d10/mymodule/main.py new file mode 100644 index 000000000..baae2c164 --- /dev/null +++ b/exercises/1901050193/d10/mymodule/main.py @@ -0,0 +1,11 @@ +import stats_word +import os + +with open('D:\\文档\\Pythonlernen-ss01\\selfteaching-python-camp\\exercises\\1901050193\\d09\\mymodule\\tang300.json',mode='r', encoding='UTF-8') as f: + text = f.read() + #从文件读取指定的字节数,如果未给定或为负则读取所有。 + +try: + print("唐诗三百首中的词频前 20 的词和词频数:\n",stats_word.stats_text_cn(text,20)) +except ValueError: + print('非字符串,请重新输入') \ No newline at end of file diff --git a/exercises/1901050193/d10/mymodule/stats_word.py b/exercises/1901050193/d10/mymodule/stats_word.py new file mode 100644 index 000000000..99a26866a --- /dev/null +++ b/exercises/1901050193/d10/mymodule/stats_word.py @@ -0,0 +1,102 @@ +from collections import Counter +import jieba +import re + + +#定义函数stats_text_cn 用于统计文档中中文单词出现的次数并按照频率降序排列。 +def stats_text_cn(text,count): + if type(text)!= str: + raise ValueError('非字符串类型') + else: + pass + #提前处理掉所有非中文部分 + #[^\u4e00-\u9fa5]表示所有非中文 + text = re.sub('[^\u4e00-\u9fa5]','',text) + + #jieba 用精确模式分词 + seg_list = jieba.cut(text, cut_all=False) + + #只统计长度大2的词 + seg_dic = [] + for word in seg_list: + if len(word)>=2: + seg_dic.append(word) + + counter = Counter(seg_dic).most_common(count) #使⽤用标准库中的 Counter 来完善统计功能 + return sorted(dict(counter).items(), key = lambda x:x[1],reverse = True) + + +text=''' +The Zen of Python, by Tim Peters +Beautiful is better than ugly. +Explicit is better than implicit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases aren't special enough to break the rules. +Although practicality beats purity. +Errors should never pass silently. +Unless explicitly silenced. +In the face of ambxiguity, refuse the temptation to guess. +There should be one-- and preferably only one --obvious way to do +it. +Although that way may not be obvious at first unless you're Dutch. +Now is better than never. +Although never is often better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it may be a good idea. +Namespaces are one honking great idea -- let's do more of those!" + +优美优于丑陋, + +明了优于隐晦; + +简单优于复杂, + +复杂优于凌乱, + +扁平优于嵌套, + +可读性很重要! + +即使实用比纯粹更优, + +特例亦不可违背原则。 + +错误绝不能悄悄忽略, + +除非它明确需要如此。 + +面对不确定性, + +拒绝妄加猜测。 + +任何问题应有一种, + +且最好只有一种, + +显而易见的解决方法。 + +尽管这方法一开始并非如此直观, + +除非你是荷兰人。 + +做优于不做, + +然而不假思索还不如不做。 + +很难解释的,必然是坏方法。 + +很好解释的,可能是好方法。 + +命名空间是个绝妙的主意, + +我们应好好利用它。 + +''' + + +if __name__=='__main__': + print('中文单字按出现次数降序排列:\n', stats_text_cn(text,20)) \ No newline at end of file