From 25539f8aed6109a5596717378a1e8a47e3a8cb0e Mon Sep 17 00:00:00 2001
From: Konaair <31443943+Konaair@users.noreply.github.com>
Date: Thu, 12 Sep 2019 15:07:18 +0200
Subject: [PATCH 1/3] Day7

---
 exercises/1901050193/1001S02E05_stats_text.py |   2 +-
 exercises/1901050193/1001S02E05_string.py     |   3 +-
 exercises/1901050193/1001S02E06_stats_word.py |  12 +-
 exercises/1901050193/README.md                |   5 +
 exercises/1901050193/d07/mymodule/main.py     |  66 ++++++++++
 .../1901050193/d07/mymodule/stats_word.py     | 119 ++++++++++++++++++
 6 files changed, 203 insertions(+), 4 deletions(-)
 create mode 100644 exercises/1901050193/d07/mymodule/main.py
 create mode 100644 exercises/1901050193/d07/mymodule/stats_word.py

diff --git a/exercises/1901050193/1001S02E05_stats_text.py b/exercises/1901050193/1001S02E05_stats_text.py
index 7bac240a9..c05d2e966 100644
--- a/exercises/1901050193/1001S02E05_stats_text.py
+++ b/exercises/1901050193/1001S02E05_stats_text.py
@@ -32,7 +32,7 @@
 counter = {} #建立空字典
 wordset = set(wordlist) #单词集合
 
-for singleword in wordlist:
+for singleword in wordset:
     counter[singleword] = wordlist.count(singleword)
 
 print('计数：',counter)
diff --git a/exercises/1901050193/1001S02E05_string.py b/exercises/1901050193/1001S02E05_string.py
index d7476dc00..4da8ee593 100644
--- a/exercises/1901050193/1001S02E05_string.py
+++ b/exercises/1901050193/1001S02E05_string.py
@@ -42,4 +42,5 @@
 
 #1.4 Sort from a to z
 print('1.4 Sort from a to z.',end='\n')
-print(sorted(swap))
\ No newline at end of file
+print(sorted(swap))
+#如果想从z到a排序 要怎么排呢 reverse=True并不行
diff --git a/exercises/1901050193/1001S02E06_stats_word.py b/exercises/1901050193/1001S02E06_stats_word.py
index d063172d8..4e2b3b05c 100644
--- a/exercises/1901050193/1001S02E06_stats_word.py
+++ b/exercises/1901050193/1001S02E06_stats_word.py
@@ -13,7 +13,7 @@ def stats_text_en(text):
     
     for singleword in wordset:
         counter[singleword] = wordlist.count(singleword)
-    return sorted(counter.items(), key=lambda x: x[1], reverse=True) #返回函数结果
+    return sorted(counter.items(), key=lambda x: x[1], reverse=True)  #返回函数结果
 
 
 #定义函数2：统计文档中中文单词出现的次数并按照频率降序排列。
@@ -108,4 +108,12 @@ def stats_text_cn(text):
     en_result = stats_text_en(en_text)  #给函数的返回结果一个值 
     cn_result = stats_text_cn(cn_text)  #中文同上
     print('英文单词按出现次数降序排列:\n', en_result)
-    print('中文单字按出现次数降序排列:\n', cn_result)
\ No newline at end of file
+    print('中文单字按出现次数降序排列:\n', cn_result)
+
+#在python中，每个模块都有一个叫_name_的内置变量，这个变量的值会根据该模块被使用的方式而变化：
+# 1、假设模块A.py 在另一个模块 B.py 中，被作为模块导入，则_name_的值为模块 A.py 的名称
+# 2、假设模块 A.py被直接执行，则_name_ 的值为_main_
+# 英文参考：https://stackoverflow.com/questions/419163/what-does-if-name-main-do
+
+#那么中文按照拼音开头排序呢
+
diff --git a/exercises/1901050193/README.md b/exercises/1901050193/README.md
index e69de29bb..97d0d3da4 100644
--- a/exercises/1901050193/README.md
+++ b/exercises/1901050193/README.md
@@ -0,0 +1,5 @@
+#一些零碎的记录Day05
+1.python中的变量不需要声明，但使用时必须赋值
+                1.整形变量
+                2.浮点型变量
+                3.字符型
diff --git a/exercises/1901050193/d07/mymodule/main.py b/exercises/1901050193/d07/mymodule/main.py
new file mode 100644
index 000000000..7ea92cb24
--- /dev/null
+++ b/exercises/1901050193/d07/mymodule/main.py
@@ -0,0 +1,66 @@
+from stats_word import stats_text
+
+sample_text = '''
+愚公移⼭山
+太⾏行行，王屋⼆二⼭山的北北⾯面，住了了⼀一個九⼗十歲的⽼老老翁，名叫愚公。⼆二⼭山佔地廣闊，擋住去路路，使他
+和家⼈人往來來極為不不便便。
+⼀一天，愚公召集家⼈人說：「讓我們各盡其⼒力力，剷平⼆二⼭山，開條道路路，直通豫州，你們認為怎
+樣？」
+⼤大家都異異⼝口同聲贊成，只有他的妻⼦子表示懷疑，並說：「你連開鑿⼀一個⼩小丘的⼒力力量量都沒有，怎
+可能剷平太⾏行行、王屋⼆二⼭山呢？況且，鑿出的⼟土⽯石⼜又丟到哪裏去呢？」
+⼤大家都熱烈烈地說：「把⼟土⽯石丟進渤海海裏。」
+於是愚公就和兒孫，⼀一起開挖⼟土，把⼟土⽯石搬運到渤海海去。
+愚公的鄰居是個寡婦，有個兒⼦子⼋八歲也興致勃勃地⾛走來來幫忙。
+寒來來暑往，他們要⼀一年年才能往返渤海海⼀一次。
+住在⿈黃河河畔的智叟，看⾒見見他們這樣⾟辛苦，取笑愚公說：「你不不是很愚蠢嗎？你已⼀一把年年紀
+了了，就是⽤用盡你的氣⼒力力，也不不能挖去⼭山的⼀一⻆角呢？」
+愚公歎息道：「你有這樣的成⾒見見，是不不會明⽩白的。你⽐比那寡婦的⼩小兒⼦子還不不如呢！就算我死
+了了，還有我的兒⼦子，我的孫⼦子，我的曾孫⼦子，他們⼀一直傳下去。⽽而這⼆二⼭山是不不會加⼤大的，總有
+⼀一天，我們會把它們剷平。」
+智叟聽了了，無話可說：
+⼆二⼭山的守護神被愚公的堅毅精神嚇倒，便便把此事奏知天帝。天帝佩服愚公的精神，就命兩位⼤大
+⼒力力神揹⾛走⼆二⼭山。
+How The Foolish Old Man Moved Mountains
+Yugong was a ninety-year-old man who lived at the north of two high
+mountains, Mount Taixing and Mount Wangwu.
+Stretching over a wide expanse of land, the mountains blocked
+yugong’s way making it inconvenient for him and his family to get
+around.
+One day yugong gathered his family together and said,”Let’s do our
+best to level these two mountains. We shall open a road that leads
+to Yuzhou. What do you think?”
+All but his wife agreed with him.
+“You don’t have the strength to cut even a small mound,” muttered
+his wife. “How on earth do you suppose you can level Mount Taixin
+and Mount Wanwu? Moreover, where will all the earth and rubble go?”
+“Dump them into the Sea of Bohai!” said everyone.
+So Yugong, his sons, and his grandsons started to break up rocks and
+remove the earth. They transported the earth and rubble to the Sea
+of Bohai.
+Now Yugong’s neighbour was a widow who had an only child eight years
+old. Evening the young boy offered his help eagerly.
+Summer went by and winter came. It took Yugong and his crew a full
+year to travel back and forth once.
+On the bank of the Yellow River dwelled an old man much respected
+for his wisdom. When he saw their back-breaking labour, he ridiculed
+Yugong saying,”Aren’t you foolish, my friend? You are very old now,
+and with whatever remains of your waning strength, you won’t be able
+to remove even a corner of the mountain.”
+Yugong uttered a sigh and said,”A biased person like you will never
+understand. You can’t even compare with the widow’s little boy!”
+“Even if I were dead, there will still be my children, my
+grandchildren, my great grandchildren, my great great grandchildren.
+They descendants will go on forever. But these mountains will not
+grow any taler. We shall level them one day!” he declared with
+confidence.
+The wise old man was totally silenced.
+When the guardian gods of the mountains saw how determined Yugong
+and his crew were, they were struck with fear and reported the
+incident to the Emperor of Heavens.
+Filled with admiration for Yugong, the Emperor of Heavens ordered
+two mighty gods to carry the mountains away.
+'''
+
+result = stats_text(sample_text)
+
+print('中英统计结果=', result)
\ No newline at end of file
diff --git a/exercises/1901050193/d07/mymodule/stats_word.py b/exercises/1901050193/d07/mymodule/stats_word.py
new file mode 100644
index 000000000..97c550251
--- /dev/null
+++ b/exercises/1901050193/d07/mymodule/stats_word.py
@@ -0,0 +1,119 @@
+#定义函数1：统计参数中每个英⽂文单词出现的次数，最后返回⼀个按词频降序排列列的数组。
+def stats_text_en(text):
+    words = text.split()
+    wordlist = []
+    symbols = ',.*-!'
+    for word in words:
+        for symbol in symbols:
+            word = word.replace(symbol,'')
+        if len(word) and word.isascii():
+            wordlist.append(word)
+    counter = {}
+    wordset = set(wordlist)
+    
+    for singleword in wordset:
+        counter[singleword] = wordlist.count(singleword)
+    return sorted(counter.items(), key=lambda x: x[1], reverse=True)  #返回函数结果
+
+
+#定义函数2：统计文档中中文单词出现的次数并按照频率降序排列。
+def stats_text_cn(text):
+    cnsymbols=[]
+    for cnsymbol in text:
+        if '\u4e00'<= cnsymbol<='\u9fff':  #判断是否属于中文单字，可以直接过滤掉符号等等了。
+            cnsymbols.append(cnsymbol)
+    counter={}
+    cnsymbols_set=set(cnsymbols)
+    for cnsymbol in cnsymbols_set:
+        counter[cnsymbol]=cnsymbols.count(cnsymbol)
+    return sorted(counter.items(), key = lambda x: x[1], reverse=True) #同上返回函数结果
+
+
+def stats_text(text): 
+#合并 英文词频 和中文词频 的结果
+    return stats_text_en(text) + stats_text_cn(text)
+
+
+en_text='''
+The Zen of Python, by Tim Peters
+Beautiful is better than ugly.
+Explicit is better than implicit.
+Simple is better than complex.
+Complex is better than complicated.
+Flat is better than nested.
+Sparse is better than dense.
+Readability counts.
+Special cases aren't special enough to break the rules.
+Although practicality beats purity.
+Errors should never pass silently.
+Unless explicitly silenced.
+In the face of ambxiguity, refuse the temptation to guess.
+There should be one-- and preferably only one --obvious way to do
+it.
+Although that way may not be obvious at first unless you're Dutch.
+Now is better than never.
+Although never is often better than *right* now.
+If the implementation is hard to explain, it's a bad idea.
+If the implementation is easy to explain, it may be a good idea.
+Namespaces are one honking great idea -- let's do more of those!"
+'''
+cn_text='''
+优美优于丑陋，
+
+明了优于隐晦；
+
+简单优于复杂，
+
+复杂优于凌乱，
+
+扁平优于嵌套，
+
+可读性很重要！
+
+即使实用比纯粹更优，
+
+特例亦不可违背原则。
+
+错误绝不能悄悄忽略，
+
+除非它明确需要如此。
+
+面对不确定性，
+
+拒绝妄加猜测。
+
+任何问题应有一种，
+
+且最好只有一种，
+
+显而易见的解决方法。
+
+尽管这方法一开始并非如此直观，
+
+除非你是荷兰人。
+
+做优于不做，
+
+然而不假思索还不如不做。
+
+很难解释的，必然是坏方法。
+
+很好解释的，可能是好方法。
+
+命名空间是个绝妙的主意，
+
+我们应好好利用它。 
+
+'''
+
+if __name__=='__main__':
+    # if _name_=='_main_'："的作用在于：如果直接执行含有该语句的模块，则执行该语句后续部分；
+    # 若在另一个模块中调用含有该语句的模块时，该语句的后续部分不执行。
+    
+    en_result = stats_text_en(en_text)  #给函数的返回结果一个值 
+    cn_result = stats_text_cn(cn_text)  #中文同上
+    print('英文单词按出现次数降序排列:\n', en_result)
+    print('中文单字按出现次数降序排列:\n', cn_result)
+
+
+

From 24036881010756e718ceca526742ef0cb78a5b87 Mon Sep 17 00:00:00 2001
From: Konaair <31443943+Konaair@users.noreply.github.com>
Date: Mon, 16 Sep 2019 12:02:24 +0200
Subject: [PATCH 2/3] Day09

---
 exercises/1901050193/d09/mymodule/main.py     |   9 ++
 .../1901050193/d09/mymodule/stats_word.py     | 127 ++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 exercises/1901050193/d09/mymodule/main.py
 create mode 100644 exercises/1901050193/d09/mymodule/stats_word.py

diff --git a/exercises/1901050193/d09/mymodule/main.py b/exercises/1901050193/d09/mymodule/main.py
new file mode 100644
index 000000000..50a9a454a
--- /dev/null
+++ b/exercises/1901050193/d09/mymodule/main.py
@@ -0,0 +1,9 @@
+import stats_word
+
+with open('D:\\文档\\Pythonlernen-ss01\\selfteaching-python-camp\\exercises\\1901050193\\d09\\mymodule\\tang300.json',mode='r', encoding='UTF-8') as f:
+    text = f.read()
+
+try:
+    stats_word.stats_text(text,100)
+except ValueError:
+    print('非字符串，请重新输入')
\ No newline at end of file
diff --git a/exercises/1901050193/d09/mymodule/stats_word.py b/exercises/1901050193/d09/mymodule/stats_word.py
new file mode 100644
index 000000000..21184a76f
--- /dev/null
+++ b/exercises/1901050193/d09/mymodule/stats_word.py
@@ -0,0 +1,127 @@
+import re
+from collections import Counter
+
+#定义函数1：统计参数中每个英⽂文单词出现的次数，最后返回⼀个按词频降序排列列的数组。
+def stats_text_en(text,count):
+    if type(text)!= str:
+        raise ValueError('非字符串类型') #遇到非字符串类型时，抛出错误
+    else:
+        pass
+    
+    
+    words = text.split()
+    wordlist = []
+    symbols = ',.*-!'  #过滤符号
+
+
+    for word in words:
+        for symbol in symbols:
+            word = word.replace(symbol,'')
+        if len(word) and word.isascii():
+            wordlist.append(word)
+    
+    
+    counter = Counter(wordlist).most_common(count)    #使⽤用标准库中的 Counter 来完善统计功能
+    return sorted(dict(counter).items(), key = lambda x:x[1],reverse = True)
+
+
+
+
+#定义函数2：统计文档中中文单词出现的次数并按照频率降序排列。
+def stats_text_cn(text,count):
+    if type(text)!= str:
+        raise ValueError('非字符串类型')
+    else:
+        pass
+    
+    
+    cnsymbols=[]
+    for cnsymbol in text:
+        if '\u4e00'<= cnsymbol<='\u9fff':  #判断是否属于中文单字，可以直接过滤掉符号等等了。
+            cnsymbols.append(cnsymbol)
+    
+    
+    counter = Counter(cnsymbols).most_common(count)     #使⽤用标准库中的 Counter 来完善统计功能
+    return sorted(dict(counter).items(), key = lambda x:x[1],reverse = True)
+
+def stats_text(text,count): 
+#合并 英文词频 和中文词频 的结果
+    if type(text)!= str:
+        raise ValueError('非字符串类型')
+    else:
+        pass
+    print("文本中的中文汉字词频为：\n", stats_text_cn(text, count))
+    print("文本中的英文单词词频为：\n", stats_text_en(text, count))
+
+
+en_text='''
+The Zen of Python, by Tim Peters
+Beautiful is better than ugly.
+Explicit is better than implicit.
+Simple is better than complex.
+Complex is better than complicated.
+Flat is better than nested.
+Sparse is better than dense.
+Readability counts.
+Special cases aren't special enough to break the rules.
+Although practicality beats purity.
+Errors should never pass silently.
+Unless explicitly silenced.
+In the face of ambxiguity, refuse the temptation to guess.
+There should be one-- and preferably only one --obvious way to do
+it.
+Although that way may not be obvious at first unless you're Dutch.
+Now is better than never.
+Although never is often better than *right* now.
+If the implementation is hard to explain, it's a bad idea.
+If the implementation is easy to explain, it may be a good idea.
+Namespaces are one honking great idea -- let's do more of those!"
+'''
+cn_text='''
+优美优于丑陋，
+
+明了优于隐晦；
+
+简单优于复杂，
+
+复杂优于凌乱，
+
+扁平优于嵌套，
+
+可读性很重要！
+
+即使实用比纯粹更优，
+
+特例亦不可违背原则。
+
+错误绝不能悄悄忽略，
+
+除非它明确需要如此。
+
+面对不确定性，
+
+拒绝妄加猜测。
+
+任何问题应有一种，
+
+且最好只有一种，
+
+显而易见的解决方法。
+
+尽管这方法一开始并非如此直观，
+
+除非你是荷兰人。
+
+做优于不做，
+
+然而不假思索还不如不做。
+
+很难解释的，必然是坏方法。
+
+很好解释的，可能是好方法。
+
+命名空间是个绝妙的主意，
+
+我们应好好利用它。 
+
+'''

From ce9a07ff80c3841987bb71908438739d808c91f2 Mon Sep 17 00:00:00 2001
From: Konaair <31443943+Konaair@users.noreply.github.com>
Date: Tue, 17 Sep 2019 20:35:43 +0200
Subject: [PATCH 3/3] Day 10

---
 exercises/1901050193/d10/mymodule/main.py     |  11 ++
 .../1901050193/d10/mymodule/stats_word.py     | 102 ++++++++++++++++++
 2 files changed, 113 insertions(+)
 create mode 100644 exercises/1901050193/d10/mymodule/main.py
 create mode 100644 exercises/1901050193/d10/mymodule/stats_word.py

diff --git a/exercises/1901050193/d10/mymodule/main.py b/exercises/1901050193/d10/mymodule/main.py
new file mode 100644
index 000000000..baae2c164
--- /dev/null
+++ b/exercises/1901050193/d10/mymodule/main.py
@@ -0,0 +1,11 @@
+import stats_word
+import os
+
+with open('D:\\文档\\Pythonlernen-ss01\\selfteaching-python-camp\\exercises\\1901050193\\d09\\mymodule\\tang300.json',mode='r', encoding='UTF-8') as f:
+    text = f.read()
+    #从文件读取指定的字节数，如果未给定或为负则读取所有。
+
+try:
+    print("唐诗三百首中的词频前 20 的词和词频数:\n",stats_word.stats_text_cn(text,20))
+except ValueError:
+    print('非字符串，请重新输入')
\ No newline at end of file
diff --git a/exercises/1901050193/d10/mymodule/stats_word.py b/exercises/1901050193/d10/mymodule/stats_word.py
new file mode 100644
index 000000000..99a26866a
--- /dev/null
+++ b/exercises/1901050193/d10/mymodule/stats_word.py
@@ -0,0 +1,102 @@
+from collections import Counter
+import jieba
+import re
+
+
+#定义函数stats_text_cn 用于统计文档中中文单词出现的次数并按照频率降序排列。
+def stats_text_cn(text,count):
+    if type(text)!= str:
+        raise ValueError('非字符串类型')
+    else:
+        pass
+    #提前处理掉所有非中文部分
+    #[^\u4e00-\u9fa5]表示所有非中文
+    text = re.sub('[^\u4e00-\u9fa5]','',text) 
+
+    #jieba 用精确模式分词
+    seg_list = jieba.cut(text, cut_all=False) 
+    
+    #只统计长度大2的词
+    seg_dic = []
+    for word in seg_list:
+        if len(word)>=2:
+            seg_dic.append(word)
+    
+    counter = Counter(seg_dic).most_common(count)     #使⽤用标准库中的 Counter 来完善统计功能
+    return sorted(dict(counter).items(), key = lambda x:x[1],reverse = True)
+
+
+text='''
+The Zen of Python, by Tim Peters
+Beautiful is better than ugly.
+Explicit is better than implicit.
+Simple is better than complex.
+Complex is better than complicated.
+Flat is better than nested.
+Sparse is better than dense.
+Readability counts.
+Special cases aren't special enough to break the rules.
+Although practicality beats purity.
+Errors should never pass silently.
+Unless explicitly silenced.
+In the face of ambxiguity, refuse the temptation to guess.
+There should be one-- and preferably only one --obvious way to do
+it.
+Although that way may not be obvious at first unless you're Dutch.
+Now is better than never.
+Although never is often better than *right* now.
+If the implementation is hard to explain, it's a bad idea.
+If the implementation is easy to explain, it may be a good idea.
+Namespaces are one honking great idea -- let's do more of those!"
+
+优美优于丑陋，
+
+明了优于隐晦；
+
+简单优于复杂，
+
+复杂优于凌乱，
+
+扁平优于嵌套，
+
+可读性很重要！
+
+即使实用比纯粹更优，
+
+特例亦不可违背原则。
+
+错误绝不能悄悄忽略，
+
+除非它明确需要如此。
+
+面对不确定性，
+
+拒绝妄加猜测。
+
+任何问题应有一种，
+
+且最好只有一种，
+
+显而易见的解决方法。
+
+尽管这方法一开始并非如此直观，
+
+除非你是荷兰人。
+
+做优于不做，
+
+然而不假思索还不如不做。
+
+很难解释的，必然是坏方法。
+
+很好解释的，可能是好方法。
+
+命名空间是个绝妙的主意，
+
+我们应好好利用它。 
+
+'''
+
+
+if __name__=='__main__':
+    print('中文单字按出现次数降序排列:\n', stats_text_cn(text,20))
\ No newline at end of file