首先我们通过一个主题词进行视频搜索, 然后把找到的视频链接保存下来,里面有个视频的 id ,通过这个 id , 调用 B 站 api 接口可以获取视频的信息, 里面有视频的播放量信息。 尽量查的视频数量少一点哦, 给 B 站造成服务压力可不好。
# -*- coding: utf-8 -*- import requests import json import urllib.request import zlib import os import re from bs4 import BeautifulSoup from urllib.parse import quote import time #<iframe src="//player.bilibili.com/player.html?aid=66494272&page=1" scrolling="no" border="0" frameborder="no" framespacing="0" allowfullscreen="true"> </iframe> headers = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1" } def require_video(video_id): URL_VIDinfo = "http://api.bilibili.com/archive_stat/stat?aid=" PARAMS = {"aid":video_id } VID_info = requests.get(url = URL_VIDinfo,params = PARAMS).json() print(VID_info) if(VID_info["message"] == "0"): hot_video = VID_info["data"]["view"] if hot_video != "--": return hot_video else: return -1 else: return -1 def get_aid(Keyword): print('searching, please wait......') f = open("hot_video.txt", "a") for page_num in range(1,2): time.sleep(1) search_url="https://search.bilibili.com/all?keyword=" + Keyword + "&page=" + str(page_num) search_url=quote(search_url,safe='/:?=&', encoding="utf-8") print(search_url) req = urllib.request.Request(url=search_url) req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE") data=urllib.request.urlopen(req) search_html = data.read().decode("utf-8",'ignore') search_bsObj = BeautifulSoup(search_html,'html.parser') search_linkList = search_bsObj.findAll("a",{"class":"title"}) count=len(search_linkList) print('found %s in this page' %count) for item in search_linkList: time.sleep(1) print('%s:%s' % (i,search_linkList[i]['title'])) search_link=search_linkList[i]['href'] search_link="http:"+search_link search_link=quote(search_link,safe='/:?=&', encoding="utf-8") print(search_link) index1 = search_link.find('av') index2 = search_link.find('?') if index1 and index2 and index1 > 4: avid = search_link[index1 + 2: index2] print(avid) video_played_times = require_video(avid) if int(video_played_times) >= 100000: f.write(avid + "\n") f.close() def main(): keyword = '舞蹈' get_aid(keyword) if __name__ == '__main__': main()