from selenium import webdriver
import time
import pymongo
class JdSpider(object):
def __init__(self):
self.url='http://www.jd.com/'
self.options=webdriver.ChromeOptions()
self.options.add_argument('--headless')
self.browser=webdriver.Chrome(options=self.options)
self.i=0
def get_html(self):
self.browser.get(self.url)
self.browser.find_element_by_xpath('//*[@id="key"]').send_keys('python书籍')
self.browser.find_element_by_xpath("//*[@class='form']/button").click()
def get_data(self):
self.browser.execute_script(
'window.scrollTo(0,document.body.scrollHeight)'
)
time.sleep(2)
li_list=self.browser.find_elements_by_xpath('//*[@id="J_goodsList"]/ul/li')
for li in li_list:
item={
}
item['name']=li.find_element_by_xpath('.//div[@class="p-name"]/a/em').text.strip()
item['price']=li.find_element_by_xpath('.//div[@class="p-price"]').text.strip()
item['count']=li.find_element_by_xpath('.//div[@class="p-commit"]/strong').text.strip()
item['shop']=li.find_element_by_xpath('.//div[@class="p-shopnum"]').text.strip()
print(item)
self.i+=1
def run(self):
self.get_html()
while True:
self.get_data()
if self.browser.page_source.find('pn-next disabled')==-1:
self.browser.find_element_by_class_name('pn-next').click()
time.sleep(1)
else:
print('数量',self.i)
break
if __name__ == '__main__':
spider=JdSpider()
spider.run()