Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions exercises/1901010161/1001S02E03_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,9 @@ def calculate():
/ for division
''')

#<<<<<<< master
# <<<<<<< master
number_1 = float(input('Please enter the first number: '))
number_2 = float(input('Please enter the second number: '))
#=======
number_1 = int(input('Please enter the first number: '))
number_2 = int(input('Please enter the second number: '))
#>>>>>>> master

if operation == '+':
print('{} + {} = '.format(number_1, number_2))
Expand Down
3 changes: 3 additions & 0 deletions exercises/1901010161/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
���������˵�14��������ʱ���ܽ�һ���Լ���ѧϰ����ˡ�
���ʼ��Python��һ����֪�����ڵ�һ֪��⣬���ʼ��װ��Ҫ���ںü������ص����ڶԸ������и��ֳ���Python��ѧϰ�ܹ�������������һ���ȡ��뵱������py�ļ�����֪��Ӧ����ô�����������ٶ��ڸ������ʶ�����ôİ���ˡ�����������У���л��λ��������飬�������̾��˽��������Һܶ�ָ����Ҳ���������˲������ġ���������ܶ�д����ÿ����ҵ֮�󡣻�ͷ��ȥ������ܿ����Լ��ijɳ���
�Ժ��һ�����ʵ�ʳ������ñ�̴����������ı仯��
File renamed without changes.
36 changes: 36 additions & 0 deletions exercises/1901010161/d11/00.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import requests
import getpass
import yagmail
from pyquery import PyQuery as py
from mymodule import stats_word

reponse = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') # 网页请求
web_text = reponse.text # 保存更多网页文本数据
document = py(web_text)
content = document('#js_content').text()
print('content=', content)

w_list = stats_word.stats_text_cn(content, 100)
w_list = str(w_list)
print(w_list)

sender = input("请输入发件人邮箱:")
password = getpass.getpass("输入发件人邮箱授权码:")

yagmail.register(sender, password)

yag = yagmail.SMTP(sender, password, host='smtp.qq.com')
yag.send('pythoncamp@163.com', '【1901010161】自学训练营学习2群DAY11 Zezhou-Sun', w_list)
print("发送成功")

'''
countlist_str = ''.join(str(i) for i in countlist)
print(countlist_str)
# Use getpass to enter the email address related information
import getpass
recipients = input('pythoncamp@163.com') ##'Enter the email address of the reciever:'
# Leverage yagmail to send out emai
import yagmail
yag = yagmail.SMTP(user=sender, password=password, host='smtp.qq.com')
yag.send(recipients, '【1901010161】自学训练营学习2群DAY11 Zezhou-Sun', countlist_str)
'''
8 changes: 8 additions & 0 deletions exercises/1901010161/d11/1111.code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"folders": [
{
"path": "D:\\用户目录\\我的文档\\GitHub\\selfteaching-python-camp\\exercises\\1901010161"
}
],
"settings": {}
}
12 changes: 12 additions & 0 deletions exercises/1901010161/d11/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from mymodule import stats_word

path = r'd:\用户目录\我的文档\GitHub\selfteaching-python-camp\exercises\1901010161\d11\mymodule\tang300.json'
with open(path, 'r', encoding='UTF-8') as f: # byte编码的类型名称是 UTF-8

read_date = f.read()


try:
print('出现频率最高的前20个词: \n', stats_word.stats_text_cn(read_date, 20))
except ValueError:
print('ValueError:type of argument is not string!')
35 changes: 35 additions & 0 deletions exercises/1901010161/d11/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import re # 调用正则表达式
import collections
import jieba
count = int()


def stats_text_en(text, count): # 定义英语文本统计函数
if type(text) == str:
m = re.sub(r'[^A-Za-z]', ' ', text) # 将text中任意非字母成分替换为空
stri = m.split() # 切分英文单词,建立字符串
return(collections.Counter(stri).most_common(count))
else:
raise ValueError('type of argument is not string')


def stats_text_cn(text, count): # 定义中文文本统计函数
if type(text) == str:
p = re.compile(r'[\u4e00-\u9fa5]') # 中文基本汉字(20902字)的编码范围是:\u4e00到\u9fa5
res = re.findall(p, text) # 获取所有中文字符
str1 = "".join(res)
str2 = jieba.lcut(str1) # 结巴分词
text1 = []
for i in str2:
if len(i) >= 2:
text1.append(i)
return(collections.Counter(text1).most_common(count))
else:
raise ValueError('type of argument is not string')


def stats_text(text, count): # 定义文本统计函数
if type(text) == str:
return(stats_text_en(text, count) + stats_text_cn(text, count)) # 输出合并英文和中文词频统计结果
else:
raise ValueError('type of argument is not string')
36 changes: 36 additions & 0 deletions exercises/1901010161/d12/00.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import requests
import getpass
import yagmail
from pyquery import PyQuery as py
from mymodule import stats_word

reponse = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA') # 网页请求
web_text = reponse.text # 保存更多网页文本数据
document = py(web_text)
content = document('#js_content').text()
print('content=', content)

w_list = stats_word.stats_text_cn(content, 100)
w_list = str(w_list)
print(w_list)

sender = input("请输入发件人邮箱:")
password = getpass.getpass("输入发件人邮箱授权码:")

yagmail.register(sender, password)

yag = yagmail.SMTP(sender, password, host='smtp.qq.com')
yag.send('pythoncamp@163.com', '【1901010161】自学训练营学习2群DAY11 Zezhou-Sun', w_list)
print("发送成功")

'''
countlist_str = ''.join(str(i) for i in countlist)
print(countlist_str)
# Use getpass to enter the email address related information
import getpass
recipients = input('pythoncamp@163.com') ##'Enter the email address of the reciever:'
# Leverage yagmail to send out emai
import yagmail
yag = yagmail.SMTP(user=sender, password=password, host='smtp.qq.com')
yag.send(recipients, '【1901010161】自学训练营学习2群DAY11 Zezhou-Sun', countlist_str)
'''
8 changes: 8 additions & 0 deletions exercises/1901010161/d12/1111.code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"folders": [
{
"path": "D:\\用户目录\\我的文档\\GitHub\\selfteaching-python-camp\\exercises\\1901010161"
}
],
"settings": {}
}
31 changes: 31 additions & 0 deletions exercises/1901010161/d12/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from mymodule import stats_word
from wxpy import Bot, Message, embed
from pyquery import PyQuery
import requests

bot = Bot() # 初始化机器人,扫码登录
my_friend = bot.friends().search('闫')[0] # 查找好友
my_friend.send('分享任意微信文章给我') # 发生文本给好友

# 监听消息
# 回复好友消息
@bot.register(my_friend)
def reply_my_friend(msg):
if msg.type == 'Sharing':
response = requests.get(msg.url)
document = PyQuery(response.text)
content = document('#js_content').text()
reply = stats_word.stats_text_cn(content, 100)
return reply
embed()


path = r'd:\用户目录\我的文档\GitHub\selfteaching-python-camp\exercises\1901010161\d11\mymodule\tang300.json'
with open(path, 'r', encoding='UTF-8') as f: # byte编码的类型名称是 UTF-8
read_date = f.read()


try:
print('出现频率最高的前20个词: \n', stats_word.stats_text_cn(read_date, 20))
except ValueError:
print('ValueError:type of argument is not string!')
35 changes: 35 additions & 0 deletions exercises/1901010161/d12/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import re # 调用正则表达式
import collections
import jieba
count = int()


def stats_text_en(text, count): # 定义英语文本统计函数
if type(text) == str:
m = re.sub(r'[^A-Za-z]', ' ', text) # 将text中任意非字母成分替换为空
stri = m.split() # 切分英文单词,建立字符串
return(collections.Counter(stri).most_common(count))
else:
raise ValueError('type of argument is not string')


def stats_text_cn(text, count): # 定义中文文本统计函数
if type(text) == str:
p = re.compile(r'[\u4e00-\u9fa5]') # 中文基本汉字(20902字)的编码范围是:\u4e00到\u9fa5
res = re.findall(p, text) # 获取所有中文字符
str1 = "".join(res)
str2 = jieba.lcut(str1) # 结巴分词
text1 = []
for i in str2:
if len(i) >= 2:
text1.append(i)
return(collections.Counter(text1).most_common(count))
else:
raise ValueError('type of argument is not string')


def stats_text(text, count): # 定义文本统计函数
if type(text) == str:
return(stats_text_en(text, count) + stats_text_cn(text, count)) # 输出合并英文和中文词频统计结果
else:
raise ValueError('type of argument is not string')
8 changes: 8 additions & 0 deletions exercises/1901010161/d13/1111.code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"folders": [
{
"path": "D:\\用户目录\\我的文档\\GitHub\\selfteaching-python-camp\\exercises\\1901010161"
}
],
"settings": {}
}
29 changes: 29 additions & 0 deletions exercises/1901010161/d13/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
from mymodule import stats_word
from pyquery import PyQuery as py
import requests
import matplotlib.pyplot as plt

reponse = requests.get('https://mp.weixin.qq.com/s/_oFklhozwgz_1QnB_pLioA') # 网页请求
web_text = reponse.text # 保存更多网页文本数据
document = py(web_text)
content = document('#js_content').text()

w_list = stats_word.stats_text_cn(content, 10)
w_list = dict(w_list)
# group_data = list(w_list.values())
group_data = tuple(w_list.values())
group_names = list(w_list.keys())
# plt.rcdefaults()
fig, ax = plt.subplots() # 建立一个figure对象,建立一个axis对象
y_pos = np.arange(len(group_names))
ax.barh(y_pos, group_data, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(group_names)
ax.invert_yaxis()
ax.set_xlabel('词频')
ax.set_title('网页中TOP10中文词语')
plt.show()

# plt.savefig(r'wordsCnt.jpeg') #保存图片
# msg.reply_image(r'wordsCnt.jpeg') #回复图片
35 changes: 35 additions & 0 deletions exercises/1901010161/d13/mymodule/stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import re # 调用正则表达式
import collections
import jieba
count = int()


def stats_text_en(text, count): # 定义英语文本统计函数
if type(text) == str:
m = re.sub(r'[^A-Za-z]', ' ', text) # 将text中任意非字母成分替换为空
stri = m.split() # 切分英文单词,建立字符串
return(collections.Counter(stri).most_common(count))
else:
raise ValueError('type of argument is not string')


def stats_text_cn(text, count): # 定义中文文本统计函数
if type(text) == str:
p = re.compile(r'[\u4e00-\u9fa5]') # 中文基本汉字(20902字)的编码范围是:\u4e00到\u9fa5
res = re.findall(p, text) # 获取所有中文字符
str1 = "".join(res)
str2 = jieba.lcut(str1) # 结巴分词
text1 = []
for i in str2:
if len(i) >= 2:
text1.append(i)
return(collections.Counter(text1).most_common(count))
else:
raise ValueError('type of argument is not string')


def stats_text(text, count): # 定义文本统计函数
if type(text) == str:
return(stats_text_en(text, count) + stats_text_cn(text, count)) # 输出合并英文和中文词频统计结果
else:
raise ValueError('type of argument is not string')
65 changes: 65 additions & 0 deletions exercises/1901010161/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# 导入模块
from pylab import *
import matplotlib.pyplot as plt
import numpy as np
from wxpy import *
import yagmail
import requests
from pyquery import PyQuery
import getpass
import logging
from mymodule import stats_word

# 安装依赖包 requests yagmail pyquery lxml
# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple requests yagmail pyquery lxml

logging.basicConfig(format='file:%(filename)s|line:%(lineno)s|message:%(message)s',level=logging.DEBUG)

# 通过 requests.get 获得 URL网页的文章,存入 response 并返回
def get_article(url):
response = requests.get(url)
document = PyQuery(response.text)
return document('#js_content').text()

def main():

# 初始化机器人,扫码登陆
bot = Bot()

# 搜索好友
my_friends = bot.friends().search('金融家')[0]

# 监听 my_friend 的分享的消息,并获得分享消息的 链接
@bot.register(chats=my_friends)
def proc(msg):
try:
logging.info('sharing url - %s', msg.url)
article = get_article(msg.url)
count = 10
result = stats_word.stats_text_cn(article, count) # 调 stats_text_cn ,将所汉字及频次记录 result

group_data = list(result.values())
group_names = list(result.keys())

y_pos = np.arange(len(group_names))

plt.rcdefaults()
fig, ax = plt.subplots()
plt.rcParams['font.sans-serif']=['SimHei']
ax.barh(y_pos, group_data, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(group_names)
ax.invert_yaxis()
ax.set_xlabel('词频')
ax.set_title('网页中TOP10中文词语')
plt.savefig(r'wordsCnt.jpeg')
msg.reply_image(r'wordsCnt.jpeg')

except Exception as e:
logging.exception(e)

# 进入 Python 命令行、让程序保持运行
embed()

if __name__ == '__main__':
main()