-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathafterparse.py
More file actions
33 lines (30 loc) · 797 Bytes
/
afterparse.py
File metadata and controls
33 lines (30 loc) · 797 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import re
import os
import time
from bs4 import BeautifulSoup
from urllib.request import urlopen
import sys
f = open(r'content_page_list_tichong_tichu.txt','r')
a = []
for i in f.readlines():
lst=i.strip('\n')
a.append(lst)
f.close()
for x1 in a:
try:
html=urlopen("http://www.langji520.com"+x1)
ctObj=BeautifulSoup(html,"html.parser")
ctObj.encoding='utf-8'
content=ctObj.find(name='div',attrs={'class':"article_con"}).find("p").get_text()
print(content)
fo=open("langji520.txt","a")
fo.write(content+'\r\n')
print("write Successful!"+str(x1)+"Still remaining:"+str(len(a)-a.index(x1)))
except AttributeError as reason:
print("AttributeError:"+str(reason)+str(x1))
pass
except UnicodeEncodeError as reason:
print(str(reason)+str(x1))
pass
finally:
f.close()