靓丽图库
import requests import parsel for page in range(1,5): print(f'============第{page}页===========') headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36" } url = f'https://www.hexuexiao.cn/meinv/guzhuang/list-{page}.html' response = requests.get(url=url, headers=headers) text = response.text #print(text) selector = parsel.Selector(text) href_list = selector.xpath('//div[@class="waterfall_1box"]/dl/dd/a/@href').getall() #print(href_list) for href in href_list: href_data = requests.get(url=href, headers=headers).text selector_2 = parsel.Selector(href_data) img_url = selector_2.xpath('//a[@class="btn btn-default btn-xs"]/@href').get() #print(img_url) img_data = requests.get(url=img_url, headers=headers).content filename = img_url.split('/')[-1] #print(filename) with open('img\\' + filename, mode='wb') as f: f.write(img_data) print(filename)
美桌网
import requests import parsel headers = { 'Cookie': 't=f8e5c504a4e96b4ddcd519559ac24870; r=1066; XSRF-TOKEN=eyJpdiI6IkdLWWRpRVlmUzk2RVNIWVVENms0V2c9PSIsInZhbHVlIjoiVDcwbjlCNEVnUFZHdG5XREJ1VWIxZWlNN2dPTG5XSEh4UTIycTdaQ2V2aWF1UkpPU2dpeEU0Z3lQaVZJazhscWt1XC80cEt0eVJcL2doaXVlZXZBaWh3U2hGNlU2VXlSQzRrY3lcL1BjdkV3b2dMUFR2MTk1UHhcL0tNY2NORVJ3MGdZIiwibWFjIjoiOTM4YWZmNTAyZDBhY2RkMmQzOWQyMDAzZDAyNzhjMDk5ZWZhZWJjMjAzMDI3MzI3MDI2ZGZjYjQ3MDJhYTZlMCJ9; win4000_session=eyJpdiI6IklKSlVWMlFHTkkzaExpdEVGYlNweUE9PSIsInZhbHVlIjoibVpxYjhHWGhocVBYZFwvQW8rZzFzdG1Td3BuRzVLbzRjTHdQd2J4aTJHWXJcL1hpTk5YNU9GUnNTSHpVU2p3Y3UxcDE3UE1UV3BzT2hJektjM2R6K1l0NGR4RmFiY1NhZlIzR0VzakhEQzdmT3Z4UjR2ZDdjZ2RJYlhcL2xzY1J4TnMiLCJtYWMiOiI2NGUyMWEwOWIxOWM1NWJkNmI1OTJlYTJjYmRjMjI1MDdkNWRmOTc5NWMyN2NlNmM0MzRkZThmMjYxOTQ2MjI0In0%3D', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36' } base_url ='http://www.win4000.com/mobile_2340_0_0_1.html' response = requests.get(url=base_url, headers=headers) data = response.text #print(data) html_data = parsel.Selector(data) data_list = html_data.xpath('//div[@class="Left_bar"]//ul[@class="clearfix"]/li/a/@href').extract() #print(data_list) for alist in data_list: #print(alist) response_2 = requests.get(url=alist, headers=headers).text html_2 = parsel.Selector(response_2) img_url = html_2.xpath('//div[@class="pic-meinv"]/a/img/@src').extract_first() #print(img_url) img_data = requests.get(url=img_url, headers=headers).content file_name = img_url.split('/')[-1] with open('img\\' + file_name, 'wb') as f: print(file_name) f.write(img_data)
英雄联盟图片
import requests import os url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36' } def save( hero_name, name, img_url): filename = f'{hero_name}\\' if not os.path.exists(filename): os.mkdir(filename) img_content = requests.get(url=img_url).content with open(filename + name + '.jpg', mode='wb') as f: f.write(img_content) response = requests.get(url=url, headers=headers) heroes = response.json()['hero'] for hero in heroes: hero_id = hero['heroId'] hero_url = f'https://game.gtimg.cn/images/lol/act/img/js/hero/{hero_id}.js' response_1 = requests.get(url=hero_url, headers=headers) skins = response_1.json()['skins'] for index in skins: title = index['heroTitle'] hero_name = index['heroName']+title img_name = index['name'] img_url = index['mainImg'] if img_url: save(hero_name, img_name,img_url) else: img_url = index['chromaImg'] print(title)
感谢每一个观看本篇文章的朋友,更多精彩敬请期待
文章多处存在借鉴,如有侵权请联系修改删除!编辑