import requests
import re
from urllib import parse
import os
class BaiduImageSpider(object):
def __init__(self):
self.url = 'https://image.baidu.com/search/flip?tn=baiduimage&word={}'
self.headers = {
'User-Agent':'Mozilla/4.0'}
def get_image(self,url,word):
res= requests.get(url,headers=self.headers)
res.encoding="utf-8"
html=res.text
print(html)
pattern = re.compile('"hoverURL":"(.*?)"',re.S)
img_link_list = pattern.findall(html)
print(img_link_list)
directory = 'C:/Users/Administrator/Desktop/image/{}/'.format(word)
if not os.path.exists(directory):
os.makedirs(directory)
i = 1
for img_link in img_link_list:
filename = '{}{}_{}.jpg'.format(directory, word, i)
self.save_image(img_link,filename)
i += 1
def save_image(self,img_link,filename):
html = requests.get(url=img_link,headers=self.headers).content
with open(filename,'wb') as f:
f.write(html)
print(filename,'下载成功')
def run(self):
word = input("您想要谁的照片?")
word_parse = parse.quote(word)
url = self.url.format(word_parse)
self.get_image(url,word)
if __name__ == '__main__':
spider = BaiduImageSpider()
spider.run()