Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions exercises/1901100229/1001S02E06_stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#1.封装统计英文字频的函数
en_text = '''
The Zen of Python , by Tim Peters

Beautiful is better than ugly.
Explicit is better than implcit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases are't special enough to break the rules.
Although practicality beats purity.
Unless explicitly silenced.
In the face of ambxiguity, refuse the temptation to guess.
There should be one --- and preferably only one -- obvious way to do it.
Although the way may not be obvious at first unless you are a Dutch.
Now is better than never.
Although never is better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it's maybe a good idea.
Namespaces are one honking great idea --- let's do more of those!
'''

def stats_text_en(text): #定义一个名为stats_text_en,接收text为参数的函数
symbols = '~!@#$%^&*(),.[]{}|?/;<>'
for i in symbols:
text=text.replace(i, ' ') #通过for...in...循环,把所有可能出现的特殊字符都替换成空格
text=text.lower() #所有字母变成小写
text=text.split() #文本转列表
dict1={} #新建一个空字典
for i in text: #指定i遍历text中的元素
j=text.count(i) #统计text中各单词的数量,这一步可以优化,因为有些单词出现了不止一遍,相当于同一个单词统计了好几遍
dict2={i:j}
dict1.update(dict2)
return sorted(dict1.items(),key=lambda x:x[1],reverse=True)


print(stats_text_en(en_text))


#2.封装统计中文汉字字频的函数
cn_text = '''
美丽胜过丑陋。
显式优于隐式。
简单比复杂更好。
复杂比复杂更好。
优于嵌套。
稀疏优于密集。
可读性很重要。
特殊情况不足以打破规则。
虽然实用性胜过纯洁。
错误不应该默默地传递。
除非明确沉默。
面对困惑,拒绝猜测的诱惑。
应该有一个 - 最好只有一个 - 明显的方法来做到这一点。
虽然这种方式起初可能并不明显,除非你是荷兰人。
现在比永远好。
虽然现在永远不会比*正确好。
如果实施很难解释,这是一个坏主意。
如果实现很容易解释,那可能是个好主意。
命名空间是一个很棒的主意 - 让我们做更多的事情吧!
'''

def stats_text_cn(text): #定义一个名为stats_text_cn,接受text为参数的函数
dict_1={}
for i in text:
if u'\u4e00' <= i <= u'\u9fff':#使用正则表达式判断是不是中文
count = text.count(i)
dict_2 = {i:count}
dict_1.update(dict_2)
return sorted(dict_1.items(),key=lambda item:item[1],reverse = True)

print(stats_text_cn(cn_text)) #调用函数并打印结果


45 changes: 45 additions & 0 deletions exercises/1901100229/d07/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
text = '''
The Zen of Python , by Tim Peters

Beautiful is better than ugly.
Explicit is better than implcit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases are't special enough to break the rules.
Although practicality beats purity.
Unless explicitly silenced.
In the face of ambxiguity, refuse the temptation to guess.
There should be one --- and preferably only one -- obvious way to do it.
Although the way may not be obvious at first unless you are a Dutch.
Now is better than never.
Although never is better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it's maybe a good idea.
Namespaces are one honking great idea --- let's do more of those!
美丽胜过丑陋。
显式优于隐式。
简单比复杂更好。
复杂比复杂更好。
优于嵌套。
稀疏优于密集。
可读性很重要。
特殊情况不足以打破规则。
虽然实用性胜过纯洁。
错误不应该默默地传递。
除非明确沉默。
面对困惑,拒绝猜测的诱惑。
应该有一个 - 最好只有一个 - 明显的方法来做到这一点。
虽然这种方式起初可能并不明显,除非你是荷兰人。
现在比永远好。
虽然现在永远不会比*正确好。
如果实施很难解释,这是一个坏主意。
如果实现很容易解释,那可能是个好主意。
命名空间是一个很棒的主意 - 让我们做更多的事情吧!
'''


from mymodule import stats_word
stats_word.stats_text(text)
59 changes: 59 additions & 0 deletions exercises/1901100229/d07/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
text = '''
The Zen of Python , by Tim Peters

Beautiful is better than ugly.
Explicit is better than implcit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases are't special enough to break the rules.
Although practicality beats purity.
Unless explicitly silenced.
In the face of ambxiguity, refuse the temptation to guess.
There should be one --- and preferably only one -- obvious way to do it.
Although the way may not be obvious at first unless you are a Dutch.
Now is better than never.
Although never is better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it's maybe a good idea.
Namespaces are one honking great idea --- let's do more of those!
美丽胜过丑陋。
显式优于隐式。
简单比复杂更好。
复杂比复杂更好。
优于嵌套。
稀疏优于密集。
可读性很重要。
特殊情况不足以打破规则。
虽然实用性胜过纯洁。
错误不应该默默地传递。
除非明确沉默。
面对困惑,拒绝猜测的诱惑。
应该有一个 - 最好只有一个 - 明显的方法来做到这一点。
虽然这种方式起初可能并不明显,除非你是荷兰人。
现在比永远好。
虽然现在永远不会比*正确好。
如果实施很难解释,这是一个坏主意。
如果实现很容易解释,那可能是个好主意。
命名空间是一个很棒的主意 - 让我们做更多的事情吧!
'''
import re
import collections
def stats_text_cn(text):
p=re.compile(u'[\u4e00-\u9fa5]') #匹配一组字符可以用方括号[]定义自己的字符分类。
a=re.findall(p,text) #找到text中匹配中文u'[\u4e00-\u9fa5]'
#re.findall遍历匹配,可以获取字符串中所有匹配的字符串,返回一个列表。
str=''.join(a) # ''.join()是字符串操作函数,常常用于字符连接操作。把list列表转为str字符串
print(collections.Counter(str)) #统计中文词频

def stats_text_en(text):
b=re.sub(r'[^A-Za-z]',' ',text) #用正则表达式过滤出26个大小写英文字母。text中非字母的替换成空格。
list=b.split() #以空格分割,返回分割后字符串列表。
print(collections.Counter(list)) #统计单词词频

def stats_text(text):
stats_text_cn(text)
stats_text_en(text)

20 changes: 20 additions & 0 deletions exercises/1901100229/d08/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
text = [2,3,4,5]

from mymodule.stats_word import stats_text_cn as cn
try: #添加一个try except捕获一次
cn(text)
except ValueError as Type_Error:
print(Type_Error)
print(cn(a))












27 changes: 27 additions & 0 deletions exercises/1901100229/d08/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
text = [2,3,4,5]
import re
import collections
def stats_text_cn(text):
if type(text)!= str:
raise ValueError("文本必须是字符串")
p=re.compile(u'[\u4e00-\u9fa5]') #匹配一组字符可以用方括号[]定义自己的字符分类。
a=re.findall(p,text) #找到text中匹配中文u'[\u4e00-\u9fa5]'
#re.findall遍历匹配,可以获取字符串中所有匹配的字符串,返回一个列表。
str=''.join(a) # ''.join()是字符串操作函数,常常用于字符连接操作。把list列表转为str字符串
print(collections.Counter(str)) #统计中文词频

def stats_text_en(text):
if type(text)!= str:
raise ValueError("文本必须是字符串")
b=re.sub(r'[^A-Za-z]',' ',text) #用正则表达式过滤出26个大小写英文字母。text中非字母的替换成空格。
list=b.split() #以空格分割,返回分割后字符串列表。
print(collections.Counter(list)) #统计单词词频

def stats_text(text):
if type(text)!= str:
raise ValueError("文本必须是字符串")
stats_text_cn(text)
stats_text_en(text)


stats_text(text)