Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions exercises/1901100283/1001S02E06_stats_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@ def stats_text_en(text):
n_dic[word]=n_dic[word]+1
sorted_dic=sorted(n_dic.items(),key=lambda n_dic:n_dic[1],reverse=True)

return dict(sorted_dic)



return dict(sorted_dic)

print(dict(sorted_dic))




#stats_text_cn 封装统计中文汉字字频的函数
def stats_text_cn(text):
a=list(text)
Expand All @@ -34,9 +39,14 @@ def stats_text_cn(text):
n_dic[word]=n_dic[word]+1
sorted_dic=sorted(n_dic.items(),key=lambda n_dic:n_dic[1],reverse=True)

return dict(sorted_dic)



return dict(sorted_dic)

print(dict(sorted_dic))




16 changes: 16 additions & 0 deletions exercises/1901100283/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
###��ѧѵ��Ӫ-Python14�������ܽ�
####��ô�������ſΣ�
����2018��һ��żȻ��������֪����Python�����ѧϰ�ȸ������ǰ������������ѧϰPython��Ҫô�о�������ѧ�ķ����Ǻ���أ�Ҫô����ֻ��ʵ�٣�û��ϵͳ�ԣ��������ն�����;���Ϸ�����
��2019���ij��������������˷����ˡ��Ƹ�����֮·���Ȿ�飬�����ᵽ�Ѿ������ü��飬ÿ�θ��̶��кܴ��ջ�����һ����ֵ�ö��Ϻü���ض��DZ����飬���Թ���Ҳ�����Ķ���û�뵽�Դӿ����Ȿ��͸о�ͣ��������һ����һ�飬Ŀǰ�ڿ����ı顣��������ܰ��Ȿ���е�ÿһ�㶼�Լ�����ȥʵ���DZض�����һ���dz��˲���ĸ��˳ɳ��������Ȿ������Ȼ�Ĺ�ע����Ц����ʦ�ļ������ںš���������������ʦ��֯��Python��̿γ̣������۸񲻷ơ���ʱ��֪��ѧϰPython����������������ڼ۸�Ϲ�û�����������μӡ�
2019��8��8�գ��������ڹ�˾��һλ��ƹ���ʦ��ͨ�й�Python����˵����Ӵ�У�������ѧϰPython����Ĵ�ѧ��ҵ���ձ�ҵ�����˷dz��õĹ�˾��������н��28000Ԫһ���¡������Ϣ�Ҹе�̫���ˣ��ҹ�����10�������������ˮƽ����Զ��һ����ѧ��ҵ��������ô�߹��ʣ������ѡ��ҵ��רҵ�зdz���Ĺ�ϵ������δ�������ƴ�Ҷ�֪�����˹����ܡ����ѧϰ�������ݷ��򡣼�Ȼ��ˣ�����ʲô���ɲ����ѧϰ��ؼ���Ϊ������׼���أ���һ�죬�ҹ��ϱ����ˡ�Python14�����š��ܷ���2000��Ԫ�Ŀγ̣���Ҳ�ǵ�һ�α���ô������Ρ�һ·�������о�����Ҫ���������θ���һ������dz��ش�����壬���Լ����ò������У�����Ҫ��˭�����Ҳ������Ҿ͸�˭����ͣ�������ĸо���
####��ѧ������͸���
���ſγ̺��������κܲ�ͬ����������Ĵ��������ˣ����ܽ����ĵ����ٱ�Ҫ֪ʶ��1��GithubЭ��ƽ̨ 2��Anaconda���������� 3����̻����﷨ 4���������⡣
�����γ̵�������������ʦ���Ƹ�����д�������ѧ��һ���׷�����ÿ����ҵ�еIJο����Ͼ�����ѧ���ݣ���Ҫ�ύ����ҵ����ʵ�����ݣ�ѧϰ�ʼ����Լ����ܽ��븴�̣������ҵ�����Ƶ�������ٴ�ѧϰ�ͱȶԵĹ��̣�����Ⱥ��һ��ѧϰ�������൱��Ӫ��һ��ѧϰ���罻����������������ѧ����ĸ��ˣ���ʲô�������ֱ����̡�����γ����õķdz�����������һ����������������ѧ����һ���׷�����
�й�Python������ܽ������¼������
����Ƿdz��Ͻ���һ��ѧ�ʡ���ʹ���һ�������ţ����ߴ�Сд��������У��ݲ���һ��С�������Ҫ��ѧϰ��̵���������ϸ�����ġ����ľ���ÿ���˶�Ӧ��ѧϰ������ſγ̡�
�������������Լ������Ҵ𰸡�������ѧ���̾����������ָ��������⣬��һ�������Լ��������Ҵ𰸣���Ҫ���Լ�һ���޶���ʱ�䣬������ʱ�����Լ������ܽ�����⣬�ǾͿ�������ص��˰�æ���
����Ҫ��������̻������ܶ���֪ʶ�㣬�������⺺��˵������ÿһ��������֪ʶ�㣬��ѧϰʱ��һ�����Ǹ���Ҫ������������˼·�ͻ᲻�����
ͬһ����������ж��ֽ��˼·���ڿ���������һʱ���뾡���ܶ�Ľ��˼·��ѡ��һ��˼·��ʼ��̺�����;���������Լ�ʵ���޷����ʱ�������Գ��������Ľ��˼·��̡�
��̷dz������߼��������ڱ��ʱ�߼�Ҳ�Ƿdz�����������һ��Ҫ�dz������������̵Ĺ����л��ߺܶ���·��
��Ҫ��Ϊ�Ǻ��Լ��޹ء�ÿ����ҵ��ɺ�����Ƶ���⣬ÿ����Ҳ���ῴ�����ǿ�����һ���û�ж��Լ�ԭ�ȵ���ҵ�޸Ĺ�����Ϊ�����Լ�����д����Ҳ�����ҵ�ˡ�ֱ��ijһ����ҵ�е���ģ��ʱ�������ˣ��������������Ƶ�������ᵽ��ֹһ�Σ����ҵ���ȴ�벻������������̽������ͻȻ�������Ƕ�ô�ֲ���һ���£�����Ϊ�����ֺ��Լ��޹ص��뷨��Ȼ�������Ĵ����˺ü���ѧϰij������Ļ��ᣡ
����һ�ηdz�������ѧ�������dz���л��Ц����ʦ����л�����Լ������������ҵ�ͬѧ�ǣ��һ������Լ�����Ȥ��Python�����ϼ���ǰ�У�
6 changes: 6 additions & 0 deletions exercises/1901100283/d09/mymodule/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from stats_word import stats_text
text=123
try:
print(stats_text(text))
except ValueError:
print('请重新输入字符串内容!')
45 changes: 45 additions & 0 deletions exercises/1901100283/d09/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#stats_text_en 封装统计英文单词词频的函数
def stats_text_en(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
else:
pass
a=text.split()
x=[]
for i in a:
if i.isalpha() is True:
x.append(i)
from collections import Counter
n=int(input('请输入您需要的词频最高的前n个词的具体数值:'))
return dict(Counter(x).most_common(n))

#stats_text_cn 封装统计中文汉字字频的函数
def stats_text_cn(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
a=list(text)
x=[]
for i in a:
if '\u4e00'<=i<='\u9fa5':
x.append(i)
from collections import Counter
n=int(input('请输入您需要的词频最高的前n个词的具体数值:'))
return dict(Counter(x).most_common(n))

#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果
def stats_text(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
new_dic=dict(stats_text_en(text),**stats_text_cn(text))
return new_dic

#读取本地文件,进行词频统计
import json
with open(r'E:\selfteaching-python-camp\exercises\1901100283\d09\mymodule\tang300.json', 'r',encoding='utf-8') as t:
read_str=t.read()
json_list=json.loads(read_str)
print(stats_text_cn(str(json_list)))

6 changes: 6 additions & 0 deletions exercises/1901100283/d10/mymodule/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from stats_word import stats_text
text=123
try:
print(stats_text(text))
except ValueError:
print('请重新输入字符串内容!')
46 changes: 46 additions & 0 deletions exercises/1901100283/d10/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#stats_text_en 封装统计英文单词词频的函数
def stats_text_en(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
else:
pass
a=text.split()
x=[]
for i in a:
if i.isalpha() is True:
x.append(i)
from collections import Counter
n=int(input('请输入您需要的词频最高的前n个词的具体数值:'))
return dict(Counter(x).most_common(n))

#stats_text_cn 封装统计中文汉字字频的函数
import jieba
def stats_text_cn(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
seg_list=jieba.lcut(text)
x=[]
for i in seg_list:
if '\u4e00'<=i<='\u9fa5' and len(i)>=2:
x.append(i)
from collections import Counter
n=int(input('请输入您需要的中文词长度大于等于2的词频最高的前n个词的具体数值:'))
return dict(Counter(x).most_common(n))

#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果
def stats_text(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
new_dic=dict(stats_text_en(text),**stats_text_cn(text))
return new_dic

#读取本地文件,进行词频统计
import json
with open(r'E:\selfteaching-python-camp\exercises\1901100283\d10\mymodule\tang300.json', 'r',encoding='utf-8') as t:
read_str=t.read()
json_list=json.loads(read_str)
print(stats_text_cn(str(json_list)))

6 changes: 6 additions & 0 deletions exercises/1901100283/d11/mymodule/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from stats_word import stats_text
text=123
try:
print(stats_text(text))
except ValueError:
print('请重新输入字符串内容!')
54 changes: 54 additions & 0 deletions exercises/1901100283/d11/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#stats_text_en 封装统计英文单词词频的函数
def stats_text_en(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
else:
pass
a=text.split()
x=[]
for i in a:
if i.isalpha() is True:
x.append(i)
from collections import Counter
n=int(input('请输入您需要的词频最高的前n个词的具体数值:'))
return dict(Counter(x).most_common(n))

#stats_text_cn 封装统计中文汉字字频的函数
import jieba
def stats_text_cn(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
seg_list=jieba.lcut(text)
x=[]
for i in seg_list:
if '\u4e00'<=i<='\u9fa5' and len(i)>=2:
x.append(i)
from collections import Counter
return dict(Counter(x).most_common(100))

#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果
def stats_text(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
new_dic=dict(stats_text_en(text),**stats_text_cn(text))
return new_dic

#通过网络请求获得网页内容,使⽤分词工具对中文字符串进行分词,统计词频,得出结果,发送给到指定的邮箱
import getpass
sender=input('请输入发件人邮箱:')
password=getpass.getpass('输入发件人邮箱密码(可复制粘贴):')
recipient=input('请输入收件人邮箱:')

import requests
response=requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
from pyquery import PyQuery
document=PyQuery(response.text)
content=document('#js_content').text()
body=str(stats_text_cn(content))

import yagmail
yag=yagmail.SMTP(sender,password,'smtp.126.com')
yag.send(recipient,'1901100283 自学训练营学习19群 Day11 PerryZ10',body)
16 changes: 16 additions & 0 deletions exercises/1901100283/d12/mymodule/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#将实战项⽬目1的功能包装集成到个⼈人微信
from wxpy import *
bot = Bot()
my_friend=bot.friends()
@bot.register(my_friend,SHARING)
def get_friend_url(msg):
if isinstance(msg.type,SHARING):
import requests
response=requests.get(msg.url)
from pyquery import PyQuery
document=PyQuery(response.text)
content=document('#js_content').text()
from stats_word import stats_text_cn
body=str(stats_text_cn(content))
msg.reply(body)
embed()
54 changes: 54 additions & 0 deletions exercises/1901100283/d12/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#stats_text_en 封装统计英文单词词频的函数
def stats_text_en(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
else:
pass
a=text.split()
x=[]
for i in a:
if i.isalpha() is True:
x.append(i)
from collections import Counter
n=int(input('请输入您需要的词频最高的前n个词的具体数值:'))
return dict(Counter(x).most_common(n))

#stats_text_cn 封装统计中文汉字字频的函数
import jieba
def stats_text_cn(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
seg_list=jieba.lcut(text)
x=[]
for i in seg_list:
if '\u4e00'<=i<='\u9fa5' and len(i)>=2:
x.append(i)
from collections import Counter
return dict(Counter(x).most_common(100))

#stats_text 分别调用stats_text_en , stats_text_cn ,输出合并词频统计结果
def stats_text(text):
if isinstance(text,str) is False:
print('您输入的内容不是字符串类型!')
raise ValueError
new_dic=dict(stats_text_en(text),**stats_text_cn(text))
return new_dic

#通过网络请求获得网页内容,使⽤分词工具对中文字符串进行分词,统计词频,得出结果,发送给到指定的邮箱
import getpass
sender=input('请输入发件人邮箱:')
password=getpass.getpass('输入发件人邮箱密码(可复制粘贴):')
recipient=input('请输入收件人邮箱:')

import requests
response=requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
from pyquery import PyQuery
document=PyQuery(response.text)
content=document('#js_content').text()
body=str(stats_text_cn(content))

import yagmail
yag=yagmail.SMTP(sender,password,'smtp.126.com')
yag.send(recipient,'1901100283 自学训练营学习19群 Day11 PerryZ10',body)
48 changes: 48 additions & 0 deletions exercises/1901100283/d13/mymodule/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#将实战项⽬目1的功能包装集成到个⼈人微信
'''from wxpy import *
bot = Bot()
my_friend=bot.friends()
@bot.register(my_friend,SHARING)
def get_friend_url(msg):
if isinstance(msg.type,SHARING):
import requests
response=requests.get(msg.url)
from pyquery import PyQuery
document=PyQuery(response.text)
content=document('#js_content').text()
from stats_word import stats_text_cn
body=str(stats_text_cn(content))
msg.reply(body)
embed()
'''
#Day11作业转化成图表
import requests
response=requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
from pyquery import PyQuery
document=PyQuery(response.text)
content=document('#js_content').text()

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from pylab import mpl
mpl.rcParams['font.sans-serif']=['Microsoft YaHei']

from stats_word import stats_text_cn
data=stats_text_cn(content)
group_data=list(data.values())
group_names=list(data.keys())

plt.rcParams.update({'figure.autolayout':True})
fig,ax=plt.subplots()
ax.barh(group_names,group_data)
plt.style.use('seaborn-paper')
labels=ax.get_xticklabels()
plt.setp(labels,horizontalalignment='right')
ax.set(xlabel='词频',ylabel='中文汉字',title='中文汉字词频统计')
plt.show()





Loading