-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTestBrower.py
More file actions
75 lines (67 loc) · 2.76 KB
/
TestBrower.py
File metadata and controls
75 lines (67 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
import urllib
import urllib.request
from idlelib import browser
from pyquery import PyQuery as pq
from selenium import webdriver
import time
import re
def openurl(url,num):
browser=webdriver.Chrome() #打开浏览器
browser.get(url)#进入相关网站
html=browser.page_source#获取网站源码
data=str(pq(html))#str() 函数将对象转化为适于人阅读的形式。
print(data)
dic={}
re_rule=r'<div class="news-item-container">(.*?)<div data-v-00b2e9bc=""/>'
datalist=re.findall(re_rule,data,re.S)
for i in range(0,len(datalist)):
rule1=r'<img src="/img/icon-lihao.png" data-v-6c26747a=""/>(.*?)<!----></span>'
bullish=re.findall(rule1,datalist[i],re.S)
if len(bullish)==0:
rule1=r'<img src="/img/icon-likong.png" data-v-6c26747a=""/>(.*?)</span>'
bullish = re.findall(rule1,datalist[i],re.S)
rule2=r'<span class="stock-group-item-name" data-v-f97d9694="">(.*?)</span>'
stock_name=re.findall(rule2,datalist[i], re.S)
if len(stock_name) > 0 and len( bullish) > 0:
for c in range(0,len(stock_name)):
dic[stock_name[c]]= bullish[0]
print("正在爬取第",len(dic)+1,"个请稍等.....")
c=len(datalist)
if len(dic) < num:
while(1):
browser.find_element_by_class_name("home-news-footer").click()
time.sleep(1)
html=browser.page_source
data=str(pq(html))
datalist=re.findall(re_rule,data,re.S)
for i in range(c,len(datalist)):
rule3=r'<img data-v-6c26747a="" src="/img/icon-lihao.png"/>(.*?)<!----></span>'
bullish = re.findall(rule3,datalist[i],re.S)
if len(bullish)==0:
rule5=r'<img data-v-6c26747a="" src="/img/icon-likong.png"/>(.*?)</span>'
bullish = re.findall(rule5,datalist[i],re.S)
rule4=r'<span data-v-f97d9694="" class="stock-group-item-name">(.*?)</span>'
stock_name=re.findall(rule4,datalist[i], re.S)
if len(stock_name) > 0 and len( bullish) > 0:
for c in range(0,len(stock_name)):
dic[stock_name[c]]= bullish[0]
c=len(datalist)
if len(dic) > num :
browser.quit()
print("爬取完毕!!")
break
print("正在爬取第",len(dic)+1,"个请稍等.....")
else:
browser.quit()
print("爬取完毕!!")
return dic
#url='https://www.xuangubao.cn/'
url='https://www.biki.com/zh_CN/trade/BTC_USDT'
dict=openurl(url,0)
print(dict)
#f=open("F:\\text.txt","a")
#for key,values in dict.items():
#f.write((key+"\t"))
#print(key,values)
#f.close()