selfteaching · XINGRUFANG · Aug 30, 2019 · Aug 30, 2019 · Aug 30, 2019 · Aug 30, 2019
diff --git a/exercises/1901100139/d10/mymodule/stats_word.py b/exercises/1901100139/d10/mymodule/stats_word.py
@@ -1,4 +1,5 @@
 from collections import Counter
+import jieba
 def stats_text_en(text,count):
     elements = text.split()
     words = []
@@ -10,11 +11,13 @@ def stats_text_en(text,count):
             words.append(element)
     return Counter(words).most_common(count)
 def stats_text_cn(text,count):
-    cn_characters = []
-    for character in text:
-        if '\u4e00' <= character <= '\u9fff':
-            cn_characters.append(character)
-    return Counter(cn_characters).most_common(count)
+    words=jieba.cut(text)
+    tmp=[]
+    for i in words:
+        if len(i)>1:
+            tmp.append(i)
+    return Counter(tmp).most_common(count)
+
 def stats_text(text,count):
     '''
     合并中英词频的结果

diff --git a/exercises/1901100139/d11/mymodule/stats_word.py b/exercises/1901100139/d11/mymodule/stats_word.py
@@ -1,4 +1,5 @@
 from collections import Counter
+import jieba
 def stats_text_en(text,count):
     elements = text.split()
     words = []
@@ -10,11 +11,13 @@ def stats_text_en(text,count):
             words.append(element)
     return Counter(words).most_common(count)
 def stats_text_cn(text,count):
-    cn_characters = []
-    for character in text:
-        if '\u4e00' <= character <= '\u9fff':
-            cn_characters.append(character)
-    return Counter(cn_characters).most_common(count)
+    words=jieba.cut(text)
+    tmp=[]
+    for i in words:
+        if len(i)>1:
+            tmp.append(i)
+    return Counter(tmp).most_common(count)
+
 def stats_text(text,count):
     '''
     合并中英词频的结果

diff --git a/exercises/1901100139/d12/main.py b/exercises/1901100139/d12/main.py
@@ -25,7 +25,7 @@ def handler(msg):
             msg.reply(str(result))
         except Exception as e:
             logging.exception(e)
-    embed            
+    embed()            
 
 
 if __name__=='__main__':

diff --git a/exercises/1901100139/d12/mymodule/stats_word.py b/exercises/1901100139/d12/mymodule/stats_word.py
@@ -1,4 +1,5 @@
 from collections import Counter
+import jieba
 def stats_text_en(text,count):
     elements = text.split()
     words = []
@@ -10,11 +11,13 @@ def stats_text_en(text,count):
             words.append(element)
     return Counter(words).most_common(count)
 def stats_text_cn(text,count):
-    cn_characters = []
-    for character in text:
-        if '\u4e00' <= character <= '\u9fff':
-            cn_characters.append(character)
-    return Counter(cn_characters).most_common(count)
+    words=jieba.cut(text)
+    tmp=[]
+    for i in words:
+        if len(i)>1:
+            tmp.append(i)
+    return Counter(tmp).most_common(count)
+
 def stats_text(text,count):
     '''
     合并中英词频的结果

diff --git a/exercises/1901100139/d13/main.py b/exercises/1901100139/d13/main.py
@@ -0,0 +1,60 @@
+from os import path
+import logging
+import requests
+import matplotlib.pyplot as plt
+import pyquery
+from wxpy import *
+from mymodule import stats_word
+
+cwd=path.abspath(path.dirname(__file__))
+
+plt.rcParams['font.sans-serif']='simHei'
+
+logging.basicConfig(format='file:%(filename)s|line:%(lineno)d|message:%(message)s',level=logging.DEBUG)
+
+def get_article(url):
+    r=requests.get(url)
+    document= pyquery.PyQuery(r.text)
+    return document('#js_content').text()
+
+def generate_image(data,image_path):
+    labels=[v[0] for v in data]
+    widths=[v[1] for v in data]
+    ypos = range(len(data))
+    fig,ax=plt.subplots()
+    ax.barh(ypos,widths)
+    ax.set_yticks(ypos)
+    ax.set_yticklabels(labels)
+    ax.invert_yaxis()
+    ax.set_ylabel('关键字')
+    ax.set_xlabel('词频')
+    ax.set_title('词频统计')
+    fig.savefig(image_path,bbox_inches='tight')
+
+def main():
+    bot=Bot()
+    friends=bot.friends()
+
+    @bot.register(friends,SHARING)
+    def handler(msg):
+        try:
+            logging.info('sharing url=%s',msg.url)
+            article=get_article(msg.url)
+            result=stats_word.stats_text_cn(article,20)
+            image_path=path.join(cwd,'stats.png')
+            generate_image(result,image_path)
+            msg.reply_image(image_path)
+        except Exception as e:
+            logging.exception(e)
+    embed() 
+
+def test():
+    article=get_article('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
+    result=stats_word.stats_text_cn(article,20)
+    image_path=path.join(cwd,'stats.png')
+    generate_image(result,image_path)
+
+if __name__=='__main__':
+    test()
+
+
diff --git a/exercises/1901100139/d13/mymodule/stats_word.py b/exercises/1901100139/d13/mymodule/stats_word.py
@@ -0,0 +1,25 @@
+from collections import Counter
+import jieba
+def stats_text_en(text,count):
+    elements = text.split()
+    words = []
+    symbols = ',.*-!'
+    for element in elements:
+        for symbol in symbols:
+            element = element.replace(symbol,'')
+        if len(element) and element.isascii():
+            words.append(element)
+    return Counter(words).most_common(count)
+def stats_text_cn(text,count):
+    words=jieba.cut(text)
+    tmp=[]
+    for i in words:
+        if len(i)>1:
+            tmp.append(i)
+    return Counter(tmp).most_common(count)
+
+def stats_text(text,count):
+    '''
+    合并中英词频的结果
+    ''' 
+    return stats_text_en(text,count) + stats_text_cn(text,count)