最近在搞一些Xpath网页规则的编写,发现网上的Xpath测试工具很多,但都是基于静态页面的。暂时还没有发现基于动态页面的Xpath测试工具,为了后续的测试方便,于是就自己动手写了一个
from tkinter import *
import tkinter as tk
import re
from lxml import etree
from selenium import webdriver
# 创建chrome无头浏览器
driver = ""
try:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=chrome_options)
except:
fire_profile = webdriver.FirefoxOptions()
fire_profile.add_argument('--disable-gpu') # 设置无头模式
fire_profile.add_argument('-headless') # 设置无头模式
driver = webdriver.Firefox(options=fire_profile)
# 动态网页源码字段
source = ""
# 辅助字段
old_url = ""
def result_to_string(result):
"""
将匹配到的网页内容输出
:param result: 已经匹配好的网页内容
:return:
"""
if isinstance(result, list):
out_str = ""
out_html = ""
for one in result:
if isinstance(one, str):
out_str = out_str + one + "\n"
out_html = out_html + one + "\n"
else:
out_str = out_str + one.text + "\n"
out_html = out_html + etree.tostring(one, pretty_print=True, encoding="utf-8").decode("utf-8") + "\n"
return out_str.replace(" ", "").strip("\n"), out_html.replace(" ", "").strip("\n")
elif isinstance(result, str):
return result.replace(" ", ""), result.replace(" ", "")
else:
return "", ""
def jiazai():
"""
加载动态网页源码
:return:
"""
global old_url, source
url = url_text.get()
if not (str(url).startswith("http://") or str(url).startswith("https://")):
source_text.delete(1.0, 'end')
source_text.insert("insert", "请检查是否添加http或https前缀!!!")
return None
if not str(url).__contains__("."):
source_text.delete(1.0, 'end')
source_text.insert("insert", "请输入正确格式的网址!!!")
return None
if url != "" and (source_text.get(1.0, 1.1) == "" or old_url != url):
source_text.delete(1.0, 'end')
old_url = url
try:
driver.get(url)
source = driver.page_source
if source == '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body></body></html>':
source_text.insert("insert", "请输入真实的网址!!!")
else:
source_text.insert("insert", source)
except Exception as e:
source_text.insert("insert", "此网址无法解析,请输入其他的网址!!!")
def ceshi():
"""
将匹配到的网页结果显示出来
:return:
"""
global source
if source != "" and xpath_text.get() != "":
html = etree.HTML(source)
print(xpath_text.get())
result = html.xpath(xpath_text.get())
out_string, out_html = result_to_string(result)
result_text.delete(1.0, "end")
result_source_text.delete(1.0, "end")
result_text.insert("insert", out_string)
result_source_text.insert("insert", out_html)
window = tk.Tk()
window.title('动态网页XPATH验证工具 Designed by Mr.Li')
window.geometry('750x560')
# 框架列
url_frame = tk.Frame(window)
url_frame.pack()
xpath_frame = tk.Frame(window)
xpath_frame.pack()
result_frame = tk.Frame(window)
result_frame.pack()
html_frame = tk.Frame(window)
html_frame.pack()
source_frame = tk.Frame(window)
source_frame.pack()
# 待匹配网址输入列
url_label = tk.Label(url_frame, text='请输入网址:')
url_label.pack(side=LEFT)
url_text = tk.Entry(url_frame, show=None, width=45)
url_text.pack(side=LEFT)
button1var = StringVar()
button1var.set("加载网页")
url_button = tk.Button(url_frame, textvariable=button1var, width=10, command=jiazai)
url_button.pack(side=RIGHT)
# xpath规则输入列
xpath_label = tk.Label(xpath_frame, text='请输入规则:')
xpath_label.pack(side=LEFT)
xpath_text = tk.Entry(xpath_frame, show=None, width=45)
xpath_text.pack(side=LEFT)
xpath_button = tk.Button(xpath_frame, text="测试", width=10, command=ceshi)
xpath_button.pack(side=RIGHT)
# 结果列
result_label = tk.Label(result_frame, text='结果文字:')
result_label.pack(side=LEFT)
result_text = tk.Text(result_frame, show=None, height=9, width=80)
result_text.pack(side=LEFT)
result_source_label = tk.Label(html_frame, text='结果源码:')
result_source_label.pack(side=LEFT)
result_source_text = tk.Text(html_frame, show=None, height=9, width=80)
result_source_text.pack(side=LEFT)
# 源码列
source_label = tk.Label(source_frame, text='网页源码:')
source_label.pack(side=LEFT)
source_text = tk.Text(source_frame, height=17, width=80)
source_text.pack(side=LEFT)
window.mainloop()
下图为启动界面:
下图为运行结果: