#读取文本文档,生成数组,将数组元素逐一传给获取数据的函数,然后写入CSV文件。可以先运行win(),再运行mian()
import requests
from lxml import etree
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import openpyxl
from selenium.webdriver.common.action_chains import ActionChains
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
driver=webdriver.Chrome(r'D:\Python\chromedriver.exe')
driver.get("https://datasearch.chinanpo.gov.cn/gsxt/newList")
url=driver.current_url
#切换到当前窗口
def win():
handles=driver.window_handles
driver.switch_to.window(handles[0])
#读取文本文档并生成数组
def text():
txt=open('test.txt')
txt=txt.read()
txt=txt.split('
')
#print(txt)
return txt
#将二维数组保存到csv文件
def writetocsv(datalist):
with open('cpi20210614.csv','a') as f:
print('saving.....')
for row in datalist:
f.write(",".join(row)+"
")
print('saving OK')
return True
#获取数据
def getdata(name):
datalist=[]
i=driver.find_element_by_xpath('//input[@class="search_input ant-input"]')
i.clear()
i.send_keys(name)
time.sleep(1)
driver.find_element_by_xpath('//span[@class="search_button"]').click()
time.sleep(2)
res=driver.find_element_by_xpath('//div[@class="search_title"]').text
print(res)
data=[]
data.append(name)
data.append(res)
datalist.append(data)
return datalist
#保存到CSV的方法
def mian():
txt=text()
while len(txt)!=0:
time.sleep(2)
name=txt[0]
try:
datelist=getdata(name)
s=writetocsv(datelist)
if s:
print(name+"完成")
txt.pop(0)
else:
pass
except:
print(name,end='')
print('稍等再试')
time.sleep(2)
driver.refresh()
time.sleep(2)
读取卖二手文本文档,生成数组,将数组元素逐一传给获取数据的函数,然后写入CSV文件。可以先运行win(),再运行mian()