安装
pip install selenium
官方文档:https://www.seleniumhq.org/docs/
测试页面
http://www.pythonscraping.com/pages/javascript/ajaxDemo.html
报错:warnings.warn(‘Selenium support for PhantomJS has been deprecated, please use headless ’
解决: 安装版本2
pip install "selenium < 3"
简单示例解析javascript
from selenium import webdriver from bs4 import BeautifulSoup import time # 显示等待页面加载 def getPage1(): url = "http://www.pythonscraping.com/pages/javascript/ajaxDemo.html" driver = webdriver.PhantomJS() driver.get(url) time.sleep(1) # 设置等待时间 # 获取内容 content = driver.find_element_by_id("content") print(content.text) # 通过bs解析 html = driver.page_source # 源代码字符串 soup = BeautifulSoup(html, "html.parser") tag = soup.find(id="content") print(tag) driver.close() from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC # 隐式等待页面加载 def getPage2(): url = "http://www.pythonscraping.com/pages/javascript/ajaxDemo.html" driver = webdriver.PhantomJS(executable_path="phantomjs") driver.get(url) # 等待页面加载完毕,获取明显元素作为标志 try: element = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "loadedButton")) ) finally: # 获取内容 content = driver.find_element_by_id("content") print(content.text) driver.close() getPage2()
点击百度自动提交
# -*- coding:utf-8 -*- from selenium import webdriver import time def clickBaidu(): # 为了看到效果,使用chrome浏览器 driver = webdriver.Chrome() time.sleep(5) baidu = "http:www.baidu.com" driver.get(baidu) time.sleep(5) driver.find_element_by_id("kw").send_keys("百度") time.sleep(5) driver.find_element_by_id("su").click() time.sleep(5) # 截屏 driver.get_screenshot_as_file("baidu_shot.png") time.sleep(5) driver.close() clickBaidu() # 说明,time.sleep() 是为了看到浏览器整个自动过程,实际使用可以去掉
鼠标动作
element.click() element.click_and_hold() element.release() element.double_click()