1.场景
用python做一个爬虫,测试案例代码
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
option=webdriver.ChromeOptions()
sever = Service(r"C:WindowsSystem32cmd.exechrom.exe")
drive=webdriver.Chrome(service=sever)
drive.get('http://www.baidu.com')
网上搞了这个入门案例,浏览器是打开了,就是打不开网页。其实这个案例误导我了。
2.解决
我们先要把环境搞正确
先下载chromedriver
http://chromedriver.storage.googleapis.com/index.html
然后把chromeDriver放到python脚本文件夹里面
代码是吊用chromeDriver.exe
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
option=webdriver.ChromeOptions()
sever = Service(r"chromedriver.exe")
drive=webdriver.Chrome(service=sever)
drive.get('http://www.baidu.com')
这样就可以了
3.案例
另外贴一个自动登录百度 爬取百度指数的案例
import urllib3
import random
from time import sleep
import execjs
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
# 定义解密函数
def decrypt(key, data, js_string):
js_handler = execjs.compile(js_string)
strdata = js_handler.call('decrypt', key, data)
return strdata
def getcookie():
option=webdriver.ChromeOptions()
#option.add_argument('--start-maximized')
#option.add_argument('--headless')
#option.add_argument('--disable-gpu')
#option.add_argument('--user-data-dir=d:/test/chrome')
#option.add_argument('--remote-debugging-port=9222')
sever = Service(r"chromedriver.exe")
drive=webdriver.Chrome(service=sever)
drive.get('http://www.baidu.com')
sleep(3)
login=drive.find_elements_by_css_selector('#u1>a.lb')[0]
login.click()
sleep(random.randint(0, 2))
#namelogin=drive.find_elements_by_css_selector('p.tang-pass-footerBarULogin')[0]
#namelogin.click()
username = '13467657887'
passwd = 'Hy13467657887'
drive.find_element_by_id("TANGRAM__PSP_11__userName").send_keys(username)
sleep(random.randint(0, 2))
drive.find_element_by_id("TANGRAM__PSP_11__password").send_keys(passwd)
sleep(2)
submit = drive.find_element_by_id('TANGRAM__PSP_11__submit')
submit.click()
sleep(1000000)
drive.find_element_by_id('kw').send_keys('百度指数')
drive.find_element_by_id('su').click()
sleep(1)
drive.switch_to.window(drive.window_handles[-1]) # 新增,用于使用lcbin@163.com账号时获得弹出新窗口的焦点
drive.find_element_by_xpath("//div//h3[@class='t']//a//em").click()
sleep(1)
drive.switch_to.window(drive.window_handles[-1]) # 新增,用于使用lcbin@163.com账号时获得弹出新窗口的焦点
now_handle = drive.current_window_handle
cookie = '; '.join(item for item in [item["name"] + "=" + item["value"] for item in drive.get_cookies()])
print(cookie)
return cookie
# 禁用警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
js_string = '''
function decrypt(t, e) {
for (var n = t.split(""), i = e.split(""), a = {}, r = [], o = 0; o < n.length / 2; o++)
a[n[o]] = n[n.length / 2 + o];
for (var s = 0; s < e.length; s++)
r.push(a[i[s]]);
return r.join("")
}
'''
headers = {
"Cookie": "cookie",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/75.0.3770.142 Safari/537.36"
}
data_url = 'http://index.baidu.com/api/FeedSearchApi/getFeedIndex?word={}&area=0&days=30'
uniq_id_url ='http://index.baidu.com/Interface/ptbk?uniqid={}'
class BDIndex(object):
def __init__(self):
self.session = self.get_session()
@staticmethod
def get_session():
session = requests.session()
session.headers = headers
session.verify = False
return session
@staticmethod
def decrypt(key, data):
js_handler = execjs.compile(js_string)
return js_handler.call('decrypt', key, data)
def get_bd_index(self, key_word):
response = self.session.get(data_url.format(key_word)).json()
uniq_id = self.session.get(uniq_id_url.format(response.get("data").get("uniqid")) ).json().get("data")
data_dict = response['data']['index'][0]['data']
decrypt_data = self.decrypt(uniq_id, data_dict)
return decrypt_data
if __name__ == '__main__':
cookie = getcookie()
#bd = BDIndex()
#data = bd.get_bd_index("肺炎")
#print(data)