-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpic_parser
More file actions
31 lines (27 loc) · 1.19 KB
/
pic_parser
File metadata and controls
31 lines (27 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import requests
from bs4 import BeautifulSoup
from urllib.request import urlretrieve
import re
# 目標頁面
res = requests.get('https://www.ptt.cc/bbs/Beauty/index2109.html',cookies={'over18':'1'})
#print(res.text)
soup = BeautifulSoup(res.text, 'lxml')
#print(soup)
# 使用迴圈進入到目標頁面中的每個主題頁面
articles=soup.select('.r-ent a')
for article in articles:
url = 'https://www.ptt.cc' + article['href']
#print(article,url)
res = requests.get(url,cookies={'over18':'1'})
soup = BeautifulSoup(res.text, 'lxml')
#print(soup)
# 判斷網址中有沒有圖片,如果有就開始下載
#pp=len(soup.findAll('a', {'href': re.compile('http:\/\/i\.imgur\.com\/.*')}))
if len(soup.findAll('a', {'href': re.compile('http:\/\/i\.imgur\.com\/.*')})) > 0:
#print("1")
for index, img_url in enumerate(soup.findAll('a', {'href': re.compile('http:\/\/i\.imgur\.com\/.*')})):
try:
# 記得更改想要下載到的位置
urlretrieve(img_url['href'], 'D:\{}_{}.jpg'.format(article.text, index))
except:
print('{} {}_{}.jpg 下載失敗!'.format(img_url['href'], article.text, index))