From 53ff2137c368f4493178c6638fdc09bc1fe53e55 Mon Sep 17 00:00:00 2001 From: monkey-soft Date: Fri, 23 Oct 2015 23:57:03 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E7=AC=AC0013=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monkey/0013/main.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 monkey/0013/main.py diff --git a/monkey/0013/main.py b/monkey/0013/main.py new file mode 100644 index 00000000..852ab1c4 --- /dev/null +++ b/monkey/0013/main.py @@ -0,0 +1,17 @@ +# -*- coding:utf-8 -*- +from lxml import etree +import requests + +__author__ = 'monkey' + +# 题目要求: +# 用Pyhton写一个爬图片的程序,爬这个链接里的日本妹子图片 +# 地址:http://tieba.baidu.com/p/2166231880 + +def spider(url): + html = requests.get(url) + print(html.text) + +if __name__ == '__main__': + url = "http://tieba.baidu.com/p/2166231880" + spider(url) \ No newline at end of file From 83c2b05e1c31ec66800fc20b5fe21b4af103d200 Mon Sep 17 00:00:00 2001 From: monkey-soft Date: Sat, 24 Oct 2015 00:12:50 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E7=AC=AC0013=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monkey/0013/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/monkey/0013/main.py b/monkey/0013/main.py index 852ab1c4..f1a62f1e 100644 --- a/monkey/0013/main.py +++ b/monkey/0013/main.py @@ -10,7 +10,11 @@ def spider(url): html = requests.get(url) - print(html.text) + selector = etree.HTML(html.text) + picitems = [] + picitems = selector.xpath('//div[@id="post_content_29397251028"]') + + print(len(picitems)) if __name__ == '__main__': url = "http://tieba.baidu.com/p/2166231880" From 3b3be198f988e3f1cc0a222cb6ba653caaae122f Mon Sep 17 00:00:00 2001 From: monkey-soft Date: Wed, 11 Nov 2015 23:31:39 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E7=AC=AC0013=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monkey/0013/main.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/monkey/0013/main.py b/monkey/0013/main.py index f1a62f1e..7cf6a1f9 100644 --- a/monkey/0013/main.py +++ b/monkey/0013/main.py @@ -1,6 +1,7 @@ # -*- coding:utf-8 -*- from lxml import etree import requests +import urllib __author__ = 'monkey' @@ -8,13 +9,27 @@ # 用Pyhton写一个爬图片的程序,爬这个链接里的日本妹子图片 # 地址:http://tieba.baidu.com/p/2166231880 +# 获取url地址,对页面进行爬去 def spider(url): html = requests.get(url) selector = etree.HTML(html.text) picitems = [] - picitems = selector.xpath('//div[@id="post_content_29397251028"]') + picitems = selector.xpath('//div[@id="post_content_29397251028"]/img[@class="BDE_Image"]') + print(len(picitems)); + + i = 0 + for pic in picitems: + url = pic.xpath('@src')[0] + print(url) + dir = './%d.jpg'%i + download_Image(url, dir) + i += 1 + + + +def download_Image(url, save_path): + urllib.request.urlretrieve(url, save_path) - print(len(picitems)) if __name__ == '__main__': url = "http://tieba.baidu.com/p/2166231880" From 6f217b27de01563609b1b9300e582d79d5e0b3f9 Mon Sep 17 00:00:00 2001 From: monkey-soft Date: Mon, 16 Nov 2015 21:29:38 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E7=AC=AC0013=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monkey/0013/main.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/monkey/0013/main.py b/monkey/0013/main.py index 7cf6a1f9..f210276a 100644 --- a/monkey/0013/main.py +++ b/monkey/0013/main.py @@ -13,20 +13,21 @@ def spider(url): html = requests.get(url) selector = etree.HTML(html.text) + picitems = [] - picitems = selector.xpath('//div[@id="post_content_29397251028"]/img[@class="BDE_Image"]') - print(len(picitems)); + picitems = selector.xpath('//div[@class="d_post_content j_d_post_content clearfix"]/img[@class="BDE_Image"]') + print(len(picitems)) i = 0 for pic in picitems: url = pic.xpath('@src')[0] - print(url) + #print(url) dir = './%d.jpg'%i download_Image(url, dir) i += 1 - +# 下载图片 def download_Image(url, save_path): urllib.request.urlretrieve(url, save_path)