python selenium webdriver chrom windows打不开网页

python | 2022-03-29 09:37:32

1.场景

用python做一个爬虫,测试案例代码

from selenium import webdriver
from selenium.webdriver.chrome.service import Service

option=webdriver.ChromeOptions()
    
sever = Service(r"C:WindowsSystem32cmd.exechrom.exe")
drive=webdriver.Chrome(service=sever)
drive.get('http://www.baidu.com')

网上搞了这个入门案例,浏览器是打开了,就是打不开网页。其实这个案例误导我了。

2.解决

我们先要把环境搞正确

先下载chromedriver

http://chromedriver.storage.googleapis.com/index.html

然后把chromeDriver放到python脚本文件夹里面

代码是吊用chromeDriver.exe

from selenium import webdriver
from selenium.webdriver.chrome.service import Service

option=webdriver.ChromeOptions()
    
sever = Service(r"chromedriver.exe")
drive=webdriver.Chrome(service=sever)
drive.get('http://www.baidu.com')

这样就可以了

 

3.案例

另外贴一个自动登录百度 爬取百度指数的案例

import urllib3

import random
from time import sleep
import execjs
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

 
# 定义解密函数
def decrypt(key, data, js_string):
    js_handler = execjs.compile(js_string)
    strdata = js_handler.call('decrypt', key, data)
    return strdata
 
 
def getcookie():
    option=webdriver.ChromeOptions()
    #option.add_argument('--start-maximized')
    #option.add_argument('--headless')
    #option.add_argument('--disable-gpu')
    #option.add_argument('--user-data-dir=d:/test/chrome')
    #option.add_argument('--remote-debugging-port=9222')
    sever = Service(r"chromedriver.exe")
    drive=webdriver.Chrome(service=sever)
    drive.get('http://www.baidu.com')
    sleep(3)
    
    login=drive.find_elements_by_css_selector('#u1>a.lb')[0]
    login.click()
    sleep(random.randint(0, 2))
    
    #namelogin=drive.find_elements_by_css_selector('p.tang-pass-footerBarULogin')[0]
    #namelogin.click()
    username = '13467657887'
    passwd = 'Hy13467657887'
    drive.find_element_by_id("TANGRAM__PSP_11__userName").send_keys(username)
    sleep(random.randint(0, 2))
    drive.find_element_by_id("TANGRAM__PSP_11__password").send_keys(passwd)
    sleep(2)
    submit = drive.find_element_by_id('TANGRAM__PSP_11__submit')
    submit.click()
    sleep(1000000)
    drive.find_element_by_id('kw').send_keys('百度指数')
    drive.find_element_by_id('su').click()
    sleep(1)
    drive.switch_to.window(drive.window_handles[-1]) # 新增,用于使用lcbin@163.com账号时获得弹出新窗口的焦点
    drive.find_element_by_xpath("//div//h3[@class='t']//a//em").click()
    sleep(1)
    drive.switch_to.window(drive.window_handles[-1]) # 新增,用于使用lcbin@163.com账号时获得弹出新窗口的焦点
    now_handle = drive.current_window_handle
    cookie = '; '.join(item for item in [item["name"] + "=" + item["value"] for item in drive.get_cookies()])
    print(cookie)
    return cookie
 

# 禁用警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
js_string = '''
function decrypt(t, e) {
    for (var n = t.split(""), i = e.split(""), a = {}, r = [], o = 0; o < n.length / 2; o++)
        a[n[o]] = n[n.length / 2 + o];
    for (var s = 0; s < e.length; s++)
        r.push(a[i[s]]);
    return r.join("")
}
'''
headers = {
    "Cookie": "cookie",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/75.0.3770.142 Safari/537.36"
}
data_url = 'http://index.baidu.com/api/FeedSearchApi/getFeedIndex?word={}&area=0&days=30'
uniq_id_url ='http://index.baidu.com/Interface/ptbk?uniqid={}'

class BDIndex(object):
 
    def __init__(self):
        self.session = self.get_session()
 
    @staticmethod
    def get_session():
        session = requests.session()
        session.headers = headers
        session.verify = False
        return session
 
    @staticmethod
    def decrypt(key, data):
        js_handler = execjs.compile(js_string)
        return js_handler.call('decrypt', key, data)
 
    def get_bd_index(self, key_word):
        response = self.session.get(data_url.format(key_word)).json()
        uniq_id = self.session.get(uniq_id_url.format(response.get("data").get("uniqid")) ).json().get("data")
        data_dict = response['data']['index'][0]['data']
 
        decrypt_data = self.decrypt(uniq_id, data_dict)
        return  decrypt_data

 
if __name__ == '__main__':
    cookie = getcookie()
    #bd = BDIndex()
    #data = bd.get_bd_index("肺炎")
    #print(data)

 

 

登录后即可回复 登录 | 注册
    
关注编程学问公众号