diff --git a/exercises/1901100283/1001S02E06_stats_word.py b/exercises/1901100283/1001S02E06_stats_word.py index 97de3b0eb..20127ac17 100644 --- a/exercises/1901100283/1001S02E06_stats_word.py +++ b/exercises/1901100283/1001S02E06_stats_word.py @@ -13,12 +13,17 @@ def stats_text_en(text): n_dic[word]=n_dic[word]+1 sorted_dic=sorted(n_dic.items(),key=lambda n_dic:n_dic[1],reverse=True) + return dict(sorted_dic) + + + return dict(sorted_dic) print(dict(sorted_dic)) + #stats_text_cn 封装统计中文汉字字频的函数 def stats_text_cn(text): a=list(text) @@ -34,9 +39,14 @@ def stats_text_cn(text): n_dic[word]=n_dic[word]+1 sorted_dic=sorted(n_dic.items(),key=lambda n_dic:n_dic[1],reverse=True) + return dict(sorted_dic) + + + return dict(sorted_dic) print(dict(sorted_dic)) + diff --git a/exercises/1901100283/README.md b/exercises/1901100283/README.md index e69de29bb..f40a59256 100644 --- a/exercises/1901100283/README.md +++ b/exercises/1901100283/README.md @@ -0,0 +1,16 @@ +###ѧѵӪ-Python14ܽ +####ôſΣ +2018һżȻ֪PythonѧϰȸǰѧϰPythonҪôоѧķǺأҪôֻʵ٣ûϵͳԣն;Ϸ +2019ij˷ˡƸ֮·Ȿ飬ᵽѾü飬ÿθ̶кܴջһֵöϺüضDZ飬ԹҲĶû뵽ԴӿⱾ͸оͣһһ飬Ŀǰڿı顣ܰⱾеÿһ㶼ԼȥʵDZضһdz˲ĸ˳ɳⱾȻĹעЦʦļںšʦ֯Python̿γ̣۸񲻷ơʱ֪ѧϰPythonڼ۸Ϲûμӡ +201988գڹ˾һλƹʦͨйPython˵ӴУѧϰPythonĴѧҵձҵ˷dzõĹ˾н28000Ԫһ¡ϢҸе̫ˣҹ10ˮƽԶһѧҵô߹ʣѡҵרҵзdzĹϵδƴҶ֪˹ܡѧϰݷ򡣼ȻˣʲôɲѧϰؼΪ׼أһ죬ҹϱˡPython14šܷ2000ԪĿγ̣ҲǵһαôΡһ·оҪθһdzش壬ԼòУҪ˭ҲҾ͸˭ͣĸо +####ѧ͸ + ſγ̺κܲͬĴˣܽĵٱҪ֪ʶ1GithubЭƽ̨ 2Anaconda 3̻﷨ 4⡣ + γ̵ʦƸдѧһ׷ÿҵеIJοϾѧݣҪύҵʵݣѧϰʼԼܽ븴̣ҵƵٴѧϰͱȶԵḶ́Ⱥһѧϰ൱Ӫһѧϰ罻ѧĸˣʲôֱ̡γõķdzһѧһ׷ + йPythonܽ¼ +ǷdzϽһѧʡʹһţߴСдУݲһСҪѧϰ̵ϸġľÿ˶Ӧѧϰſγ̡ +ԼҴ𰸡ѧָ̾⣬һԼҴ𰸣ҪԼһ޶ʱ䣬ʱԼܽ⣬ǾͿص˰æ +Ҫ̻֪ܶʶ㣬⺺˵ÿһ֪ʶ㣬ѧϰʱһǸҪ˼·ͻ᲻ +ͬһжֽ˼·ڿһʱ뾡ܶĽ˼·ѡһ˼·ʼ̺;Լʵ޷ʱԳĽ˼·̡ +̷dz߼ڱʱ߼ҲǷdzһҪdz̵Ĺлߺܶ· +ҪΪǺԼ޹ءÿҵɺƵ⣬ÿҲῴǿһûжԼԭȵҵ޸ĹΪԼдҲҵˡֱijһҵеģʱˣƵᵽֹһΣҵȴ벻̽ͻȻǶôֲһ£ΪֺԼ޹ص뷨ȻĴ˺üѧϰijĻᣡ +һηdzѧdzлЦʦлԼҵͬѧǣһԼȤPythonϼǰУ diff --git a/exercises/1901100283/d09/mymodule/main.py b/exercises/1901100283/d09/mymodule/main.py new file mode 100644 index 000000000..2d2afc5e4 --- /dev/null +++ b/exercises/1901100283/d09/mymodule/main.py @@ -0,0 +1,6 @@ +from stats_word import stats_text +text=123 +try: + print(stats_text(text)) +except ValueError: + print('请重新输入字符串内容!') diff --git a/exercises/1901100283/d09/mymodule/stats_word.py b/exercises/1901100283/d09/mymodule/stats_word.py new file mode 100644 index 000000000..ebaa02ea5 --- /dev/null +++ b/exercises/1901100283/d09/mymodule/stats_word.py @@ -0,0 +1,45 @@ +#stats_text_en 封装统计英文单词词频的函数 +def stats_text_en(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + else: + pass + a=text.split() + x=[] + for i in a: + if i.isalpha() is True: + x.append(i) + from collections import Counter + n=int(input('请输入您需要的词频最高的前n个词的具体数值:')) + return dict(Counter(x).most_common(n)) + +#stats_text_cn 封装统计中文汉字字频的函数 +def stats_text_cn(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + a=list(text) + x=[] + for i in a: + if '\u4e00'<=i<='\u9fa5': + x.append(i) + from collections import Counter + n=int(input('请输入您需要的词频最高的前n个词的具体数值:')) + return dict(Counter(x).most_common(n)) + +#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果 +def stats_text(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + new_dic=dict(stats_text_en(text),**stats_text_cn(text)) + return new_dic + +#读取本地文件,进行词频统计 +import json +with open(r'E:\selfteaching-python-camp\exercises\1901100283\d09\mymodule\tang300.json', 'r',encoding='utf-8') as t: + read_str=t.read() + json_list=json.loads(read_str) + print(stats_text_cn(str(json_list))) + diff --git a/exercises/1901100283/d10/mymodule/main.py b/exercises/1901100283/d10/mymodule/main.py new file mode 100644 index 000000000..82eb80e0c --- /dev/null +++ b/exercises/1901100283/d10/mymodule/main.py @@ -0,0 +1,6 @@ +from stats_word import stats_text +text=123 +try: + print(stats_text(text)) +except ValueError: + print('请重新输入字符串内容!') \ No newline at end of file diff --git a/exercises/1901100283/d10/mymodule/stats_word.py b/exercises/1901100283/d10/mymodule/stats_word.py new file mode 100644 index 000000000..18cfc50ac --- /dev/null +++ b/exercises/1901100283/d10/mymodule/stats_word.py @@ -0,0 +1,46 @@ +#stats_text_en 封装统计英文单词词频的函数 +def stats_text_en(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + else: + pass + a=text.split() + x=[] + for i in a: + if i.isalpha() is True: + x.append(i) + from collections import Counter + n=int(input('请输入您需要的词频最高的前n个词的具体数值:')) + return dict(Counter(x).most_common(n)) + +#stats_text_cn 封装统计中文汉字字频的函数 +import jieba +def stats_text_cn(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + seg_list=jieba.lcut(text) + x=[] + for i in seg_list: + if '\u4e00'<=i<='\u9fa5' and len(i)>=2: + x.append(i) + from collections import Counter + n=int(input('请输入您需要的中文词长度大于等于2的词频最高的前n个词的具体数值:')) + return dict(Counter(x).most_common(n)) + +#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果 +def stats_text(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + new_dic=dict(stats_text_en(text),**stats_text_cn(text)) + return new_dic + +#读取本地文件,进行词频统计 +import json +with open(r'E:\selfteaching-python-camp\exercises\1901100283\d10\mymodule\tang300.json', 'r',encoding='utf-8') as t: + read_str=t.read() + json_list=json.loads(read_str) + print(stats_text_cn(str(json_list))) + diff --git a/exercises/1901100283/d11/mymodule/main.py b/exercises/1901100283/d11/mymodule/main.py new file mode 100644 index 000000000..82eb80e0c --- /dev/null +++ b/exercises/1901100283/d11/mymodule/main.py @@ -0,0 +1,6 @@ +from stats_word import stats_text +text=123 +try: + print(stats_text(text)) +except ValueError: + print('请重新输入字符串内容!') \ No newline at end of file diff --git a/exercises/1901100283/d11/mymodule/stats_word.py b/exercises/1901100283/d11/mymodule/stats_word.py new file mode 100644 index 000000000..78616ae27 --- /dev/null +++ b/exercises/1901100283/d11/mymodule/stats_word.py @@ -0,0 +1,54 @@ +#stats_text_en 封装统计英文单词词频的函数 +def stats_text_en(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + else: + pass + a=text.split() + x=[] + for i in a: + if i.isalpha() is True: + x.append(i) + from collections import Counter + n=int(input('请输入您需要的词频最高的前n个词的具体数值:')) + return dict(Counter(x).most_common(n)) + +#stats_text_cn 封装统计中文汉字字频的函数 +import jieba +def stats_text_cn(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + seg_list=jieba.lcut(text) + x=[] + for i in seg_list: + if '\u4e00'<=i<='\u9fa5' and len(i)>=2: + x.append(i) + from collections import Counter + return dict(Counter(x).most_common(100)) + +#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果 +def stats_text(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + new_dic=dict(stats_text_en(text),**stats_text_cn(text)) + return new_dic + +#通过网络请求获得网页内容,使⽤分词工具对中文字符串进行分词,统计词频,得出结果,发送给到指定的邮箱 +import getpass +sender=input('请输入发件人邮箱:') +password=getpass.getpass('输入发件人邮箱密码(可复制粘贴):') +recipient=input('请输入收件人邮箱:') + +import requests +response=requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') +from pyquery import PyQuery +document=PyQuery(response.text) +content=document('#js_content').text() +body=str(stats_text_cn(content)) + +import yagmail +yag=yagmail.SMTP(sender,password,'smtp.126.com') +yag.send(recipient,'1901100283 自学训练营学习19群 Day11 PerryZ10',body) \ No newline at end of file diff --git a/exercises/1901100283/d12/mymodule/main.py b/exercises/1901100283/d12/mymodule/main.py new file mode 100644 index 000000000..0713aae2e --- /dev/null +++ b/exercises/1901100283/d12/mymodule/main.py @@ -0,0 +1,16 @@ +#将实战项⽬目1的功能包装集成到个⼈人微信 +from wxpy import * +bot = Bot() +my_friend=bot.friends() +@bot.register(my_friend,SHARING) +def get_friend_url(msg): + if isinstance(msg.type,SHARING): + import requests + response=requests.get(msg.url) + from pyquery import PyQuery + document=PyQuery(response.text) + content=document('#js_content').text() + from stats_word import stats_text_cn + body=str(stats_text_cn(content)) + msg.reply(body) +embed() \ No newline at end of file diff --git a/exercises/1901100283/d12/mymodule/stats_word.py b/exercises/1901100283/d12/mymodule/stats_word.py new file mode 100644 index 000000000..78616ae27 --- /dev/null +++ b/exercises/1901100283/d12/mymodule/stats_word.py @@ -0,0 +1,54 @@ +#stats_text_en 封装统计英文单词词频的函数 +def stats_text_en(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + else: + pass + a=text.split() + x=[] + for i in a: + if i.isalpha() is True: + x.append(i) + from collections import Counter + n=int(input('请输入您需要的词频最高的前n个词的具体数值:')) + return dict(Counter(x).most_common(n)) + +#stats_text_cn 封装统计中文汉字字频的函数 +import jieba +def stats_text_cn(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + seg_list=jieba.lcut(text) + x=[] + for i in seg_list: + if '\u4e00'<=i<='\u9fa5' and len(i)>=2: + x.append(i) + from collections import Counter + return dict(Counter(x).most_common(100)) + +#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果 +def stats_text(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + new_dic=dict(stats_text_en(text),**stats_text_cn(text)) + return new_dic + +#通过网络请求获得网页内容,使⽤分词工具对中文字符串进行分词,统计词频,得出结果,发送给到指定的邮箱 +import getpass +sender=input('请输入发件人邮箱:') +password=getpass.getpass('输入发件人邮箱密码(可复制粘贴):') +recipient=input('请输入收件人邮箱:') + +import requests +response=requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') +from pyquery import PyQuery +document=PyQuery(response.text) +content=document('#js_content').text() +body=str(stats_text_cn(content)) + +import yagmail +yag=yagmail.SMTP(sender,password,'smtp.126.com') +yag.send(recipient,'1901100283 自学训练营学习19群 Day11 PerryZ10',body) \ No newline at end of file diff --git a/exercises/1901100283/d13/mymodule/main.py b/exercises/1901100283/d13/mymodule/main.py new file mode 100644 index 000000000..3cde393f9 --- /dev/null +++ b/exercises/1901100283/d13/mymodule/main.py @@ -0,0 +1,48 @@ +#将实战项⽬目1的功能包装集成到个⼈人微信 +'''from wxpy import * +bot = Bot() +my_friend=bot.friends() +@bot.register(my_friend,SHARING) +def get_friend_url(msg): + if isinstance(msg.type,SHARING): + import requests + response=requests.get(msg.url) + from pyquery import PyQuery + document=PyQuery(response.text) + content=document('#js_content').text() + from stats_word import stats_text_cn + body=str(stats_text_cn(content)) + msg.reply(body) +embed() +''' +#Day11作业转化成图表 +import requests +response=requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') +from pyquery import PyQuery +document=PyQuery(response.text) +content=document('#js_content').text() + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import FuncFormatter +from pylab import mpl +mpl.rcParams['font.sans-serif']=['Microsoft YaHei'] + +from stats_word import stats_text_cn +data=stats_text_cn(content) +group_data=list(data.values()) +group_names=list(data.keys()) + +plt.rcParams.update({'figure.autolayout':True}) +fig,ax=plt.subplots() +ax.barh(group_names,group_data) +plt.style.use('seaborn-paper') +labels=ax.get_xticklabels() +plt.setp(labels,horizontalalignment='right') +ax.set(xlabel='词频',ylabel='中文汉字',title='中文汉字词频统计') +plt.show() + + + + + diff --git a/exercises/1901100283/d13/mymodule/stats_word.py b/exercises/1901100283/d13/mymodule/stats_word.py new file mode 100644 index 000000000..fbb009f6b --- /dev/null +++ b/exercises/1901100283/d13/mymodule/stats_word.py @@ -0,0 +1,55 @@ +#stats_text_en 封装统计英文单词词频的函数 +def stats_text_en(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + else: + pass + a=text.split() + x=[] + for i in a: + if i.isalpha() is True: + x.append(i) + from collections import Counter + n=int(input('请输入您需要的词频最高的前n个词的具体数值:')) + return dict(Counter(x).most_common(n)) + +#stats_text_cn 封装统计中文汉字字频的函数 +import jieba +def stats_text_cn(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + seg_list=jieba.lcut(text) + x=[] + for i in seg_list: + if '\u4e00'<=i<='\u9fa5' and len(i)>=2: + x.append(i) + from collections import Counter + return dict(Counter(x).most_common(20)) + +#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果 +def stats_text(text): + if isinstance(text,str) is False: + print('您输入的内容不是字符串类型!') + raise ValueError + new_dic=dict(stats_text_en(text),**stats_text_cn(text)) + return new_dic + +if __name__=='__main__': +#通过网络请求获得网页内容,使⽤分词工具对中文字符串进行分词,统计词频,得出结果,发送给到指定的邮箱 + import getpass + sender=input('请输入发件人邮箱:') + password=getpass.getpass('输入发件人邮箱密码(可复制粘贴):') + recipient=input('请输入收件人邮箱:') + + import requests + response=requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') + from pyquery import PyQuery + document=PyQuery(response.text) + content=document('#js_content').text() + body=str(stats_text_cn(content)) + + import yagmail + yag=yagmail.SMTP(sender,password,'smtp.126.com') + yag.send(recipient,'1901100283 自学训练营学习19群 Day11 PerryZ10',body) \ No newline at end of file