diff --git a/exercises/1901100229/1001S02E06_stats_word.py b/exercises/1901100229/1001S02E06_stats_word.py new file mode 100755 index 000000000..a14613b21 --- /dev/null +++ b/exercises/1901100229/1001S02E06_stats_word.py @@ -0,0 +1,76 @@ +#1.封装统计英文字频的函数 +en_text = ''' +The Zen of Python , by Tim Peters + +Beautiful is better than ugly. +Explicit is better than implcit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases are't special enough to break the rules. +Although practicality beats purity. +Unless explicitly silenced. +In the face of ambxiguity, refuse the temptation to guess. +There should be one --- and preferably only one -- obvious way to do it. +Although the way may not be obvious at first unless you are a Dutch. +Now is better than never. +Although never is better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it's maybe a good idea. +Namespaces are one honking great idea --- let's do more of those! +''' + +def stats_text_en(text): #定义一个名为stats_text_en,接收text为参数的函数 + symbols = '~!@#$%^&*(),.[]{}|?/;<>' + for i in symbols: + text=text.replace(i, ' ') #通过for...in...循环,把所有可能出现的特殊字符都替换成空格 + text=text.lower() #所有字母变成小写 + text=text.split() #文本转列表 + dict1={} #新建一个空字典 + for i in text: #指定i遍历text中的元素 + j=text.count(i) #统计text中各单词的数量,这一步可以优化,因为有些单词出现了不止一遍,相当于同一个单词统计了好几遍 + dict2={i:j} + dict1.update(dict2) + return sorted(dict1.items(),key=lambda x:x[1],reverse=True) + + +print(stats_text_en(en_text)) + + +#2.封装统计中文汉字字频的函数 +cn_text = ''' +美丽胜过丑陋。 +显式优于隐式。 +简单比复杂更好。 +复杂比复杂更好。 +优于嵌套。 +稀疏优于密集。 +可读性很重要。 +特殊情况不足以打破规则。 +虽然实用性胜过纯洁。 +错误不应该默默地传递。 +除非明确沉默。 +面对困惑,拒绝猜测的诱惑。 +应该有一个 - 最好只有一个 - 明显的方法来做到这一点。 +虽然这种方式起初可能并不明显,除非你是荷兰人。 +现在比永远好。 +虽然现在永远不会比*正确好。 +如果实施很难解释,这是一个坏主意。 +如果实现很容易解释,那可能是个好主意。 +命名空间是一个很棒的主意 - 让我们做更多的事情吧! +''' + +def stats_text_cn(text): #定义一个名为stats_text_cn,接受text为参数的函数 + dict_1={} + for i in text: + if u'\u4e00' <= i <= u'\u9fff':#使用正则表达式判断是不是中文 + count = text.count(i) + dict_2 = {i:count} + dict_1.update(dict_2) + return sorted(dict_1.items(),key=lambda item:item[1],reverse = True) + +print(stats_text_cn(cn_text)) #调用函数并打印结果 + + \ No newline at end of file diff --git a/exercises/1901100229/d07/main.py b/exercises/1901100229/d07/main.py new file mode 100755 index 000000000..d4c3bb13f --- /dev/null +++ b/exercises/1901100229/d07/main.py @@ -0,0 +1,45 @@ +text = ''' +The Zen of Python , by Tim Peters + +Beautiful is better than ugly. +Explicit is better than implcit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases are't special enough to break the rules. +Although practicality beats purity. +Unless explicitly silenced. +In the face of ambxiguity, refuse the temptation to guess. +There should be one --- and preferably only one -- obvious way to do it. +Although the way may not be obvious at first unless you are a Dutch. +Now is better than never. +Although never is better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it's maybe a good idea. +Namespaces are one honking great idea --- let's do more of those! +美丽胜过丑陋。 +显式优于隐式。 +简单比复杂更好。 +复杂比复杂更好。 +优于嵌套。 +稀疏优于密集。 +可读性很重要。 +特殊情况不足以打破规则。 +虽然实用性胜过纯洁。 +错误不应该默默地传递。 +除非明确沉默。 +面对困惑,拒绝猜测的诱惑。 +应该有一个 - 最好只有一个 - 明显的方法来做到这一点。 +虽然这种方式起初可能并不明显,除非你是荷兰人。 +现在比永远好。 +虽然现在永远不会比*正确好。 +如果实施很难解释,这是一个坏主意。 +如果实现很容易解释,那可能是个好主意。 +命名空间是一个很棒的主意 - 让我们做更多的事情吧! + ''' + + +from mymodule import stats_word +stats_word.stats_text(text) \ No newline at end of file diff --git a/exercises/1901100229/d07/mymodule/stats_word.py b/exercises/1901100229/d07/mymodule/stats_word.py new file mode 100755 index 000000000..0a07848c8 --- /dev/null +++ b/exercises/1901100229/d07/mymodule/stats_word.py @@ -0,0 +1,59 @@ +text = ''' +The Zen of Python , by Tim Peters + +Beautiful is better than ugly. +Explicit is better than implcit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases are't special enough to break the rules. +Although practicality beats purity. +Unless explicitly silenced. +In the face of ambxiguity, refuse the temptation to guess. +There should be one --- and preferably only one -- obvious way to do it. +Although the way may not be obvious at first unless you are a Dutch. +Now is better than never. +Although never is better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it's maybe a good idea. +Namespaces are one honking great idea --- let's do more of those! +美丽胜过丑陋。 +显式优于隐式。 +简单比复杂更好。 +复杂比复杂更好。 +优于嵌套。 +稀疏优于密集。 +可读性很重要。 +特殊情况不足以打破规则。 +虽然实用性胜过纯洁。 +错误不应该默默地传递。 +除非明确沉默。 +面对困惑,拒绝猜测的诱惑。 +应该有一个 - 最好只有一个 - 明显的方法来做到这一点。 +虽然这种方式起初可能并不明显,除非你是荷兰人。 +现在比永远好。 +虽然现在永远不会比*正确好。 +如果实施很难解释,这是一个坏主意。 +如果实现很容易解释,那可能是个好主意。 +命名空间是一个很棒的主意 - 让我们做更多的事情吧! + ''' +import re +import collections +def stats_text_cn(text): + p=re.compile(u'[\u4e00-\u9fa5]') #匹配一组字符可以用方括号[]定义自己的字符分类。 + a=re.findall(p,text) #找到text中匹配中文u'[\u4e00-\u9fa5]' + #re.findall遍历匹配,可以获取字符串中所有匹配的字符串,返回一个列表。 + str=''.join(a) # ''.join()是字符串操作函数,常常用于字符连接操作。把list列表转为str字符串 + print(collections.Counter(str)) #统计中文词频 + +def stats_text_en(text): + b=re.sub(r'[^A-Za-z]',' ',text) #用正则表达式过滤出26个大小写英文字母。text中非字母的替换成空格。 + list=b.split() #以空格分割,返回分割后字符串列表。 + print(collections.Counter(list)) #统计单词词频 + +def stats_text(text): + stats_text_cn(text) + stats_text_en(text) + \ No newline at end of file diff --git a/exercises/1901100229/d08/main.py b/exercises/1901100229/d08/main.py new file mode 100755 index 000000000..52c4d2284 --- /dev/null +++ b/exercises/1901100229/d08/main.py @@ -0,0 +1,20 @@ +text = [2,3,4,5] + +from mymodule.stats_word import stats_text_cn as cn +try: #添加一个try except捕获一次 + cn(text) +except ValueError as Type_Error: + print(Type_Error) +print(cn(a)) + + + + + + + + + + + + diff --git a/exercises/1901100229/d08/mymodule/stats_word.py b/exercises/1901100229/d08/mymodule/stats_word.py new file mode 100755 index 000000000..d7cedb8cf --- /dev/null +++ b/exercises/1901100229/d08/mymodule/stats_word.py @@ -0,0 +1,27 @@ +text = [2,3,4,5] +import re +import collections +def stats_text_cn(text): + if type(text)!= str: + raise ValueError("文本必须是字符串") + p=re.compile(u'[\u4e00-\u9fa5]') #匹配一组字符可以用方括号[]定义自己的字符分类。 + a=re.findall(p,text) #找到text中匹配中文u'[\u4e00-\u9fa5]' + #re.findall遍历匹配,可以获取字符串中所有匹配的字符串,返回一个列表。 + str=''.join(a) # ''.join()是字符串操作函数,常常用于字符连接操作。把list列表转为str字符串 + print(collections.Counter(str)) #统计中文词频 + +def stats_text_en(text): + if type(text)!= str: + raise ValueError("文本必须是字符串") + b=re.sub(r'[^A-Za-z]',' ',text) #用正则表达式过滤出26个大小写英文字母。text中非字母的替换成空格。 + list=b.split() #以空格分割,返回分割后字符串列表。 + print(collections.Counter(list)) #统计单词词频 + +def stats_text(text): + if type(text)!= str: + raise ValueError("文本必须是字符串") + stats_text_cn(text) + stats_text_en(text) + + +stats_text(text) \ No newline at end of file