直接上代码:
import re import requests # 创建定长数字,作为一会名字 def creat_fivenum(num,n=5): for i in range(1,10): s=str(num) j=10**i if (int(num/j)==0): for a in range(1,6-i): s="0"+s return s #爬取臭事百科 # https://www.qiushibaike.com/imgrank/ # 分析一下照片所在的区域 # <div class="thumb"> # <a href="/article/124066439" target="_blank"> # <img src="//pic.qiushibaike.com/system/pictures/12406/124066439/medium/NKSSOW6NS7WM1L6J.jpg" alt="糗事#124066439" class="illustration" width="100%" height="auto"> # </a> # </div> def downQiushiImg(endpage=13): """ 只需要传参,一共爬取多少页就行 :param endpage: :return: """ num = 1 # 初始化计数器 for page in range(endpage): url = f"https://www.qiushibaike.com/imgrank/page/{page+1}/" res = requests.get(url, headers=headers) ex = '<div class="thumb">.*?<img src="(.*?)" alt.*?</div>' r = re.findall(ex, res.text, re.S) for i in r: url = "http:" + i print(url) res = requests.get(url, headers=headers) with open(f"D://糗事百科image/{creat_fivenum(num)}.png", "wb")as f: f.write(res.content) num = num + 1; print(f"爬取完毕,一共{num-1}个照片") downQiushiImg()
View Code
注意:再次申明,正则用的不好,是因为你需要指定re.S