A站
import requests import re import os import zipfile url = input('请输入视频网址:') headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36' } response = requests.get(url=url,headers=headers) #print(response.text) #取第一行, 替换字符 m3u8_url = re.findall('"backupUrl(.*?)"]',response.text)[0].replace('\\":[\\"','').replace('\\','') title = re.findall('<title >(.*?)- AcFun弹幕视频网 - 认真你就输啦 \(\?\ω\?\)ノ- \( ゜- ゜\)つロ</title>',response.text)[0] #print(m3u8_url) #os自动创建文件夹 filename = f'{title}\\' if not os.path.exists(filename): os.mkdir(filename) m3u8_data = requests.get(url=m3u8_url,headers=headers).text m3u8_data = re.sub('#EXTM3U','',m3u8_data) m3u8_data = re.sub('#EXT-X-VERSION:\d','',m3u8_data) m3u8_data = re.sub('#EXT-X-TARGETDURATION:\d','',m3u8_data) m3u8_data = re.sub('#EXT-X-MEDIA-SEQUENCE:\d','',m3u8_data) m3u8_data = re.sub('#EXTINF:\d\.\d+,','',m3u8_data) m3u8_data = re.sub('#EXT-X-ENDLIST','',m3u8_data).split() #print(m3u8_data) for index in m3u8_data: ts_url = 'https://ali-safety-video.acfun.cn/mediacloud/acfun/acfun_video/hls/' + index ts_name = ts_url.split('.')[3] #二进制 ts_content = requests.get(url=ts_url,headers=headers).content with open(filename + ts_name + '.ts',mode='wb') as f: f.write(ts_content) print(ts_name) print('视频片段下载完成') print('开始合并......') files = os.listdir(filename) print(files) with zipfile.ZipFile(filename + title + '.mp4',mode='w') as f: for i in files: file = filename + i f.write(file) os.remove(file) print('爬取完成')
B站(音,视频未合成)
import json import re import subprocess import requests # 访问网站 def get_response(html_url): # 出现403加防盗链referer headers = { 'referer': 'https://www.bilibili.com/video/BV1TF411w7vv?spm_id_from=333.337.search-card.all.click&vd_source=415a9fdfbb14115b672b4063903571a0', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36' } response = requests.get(url=html_url, headers=headers) return response # 获取信息 def get_video_info(html_url): response = get_response(html_url=html_url) # print(response.text) # 提取视频标题 title = re.findall('<h1 title="(.*?)" class="video-title tit">', response.text)[0] html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', response.text)[0] # print(title) # print(html_data) # pprint.pprint(html_data) json_data = json.loads(html_data) # 根据冒号左边内容,提取右边内容,键取对值 audio_url = json_data['data']['dash']['audio'][0]['baseUrl'] video_url = json_data['data']['dash']['video'][0]['baseUrl'] video_info = [title, audio_url, video_url] # pprint.pprint(json_data) # print(audio_url) # print(video_url) return video_info # 保存数据 def save(title, audio_url, video_url): audio_content = get_response(html_url=audio_url).content video_content = get_response(html_url=video_url).content with open(title + '.mp3', mode='wb') as f: f.write(audio_content) with open(title + '.mp4', mode='wb') as f: f.write(video_content) print(title, '保存成功') # def merge_data(vide_name): # print('视频开始合成', vide_name) # cmd = f"ffmpeg -i {vide_name}.mp4 -i {vide_name}.mp3 -c:a aac -strict experimental {vide_name}output.mp4" # #print(cmd) # subprocess.run(cmd, shell=True) # print('视频合成完毕', vide_name) def main(bv_id): url = f'https://www.bilibili.com/video/{bv_id}' video_info = get_video_info(url) save(video_info[0], video_info[1], video_info[2]) #merge_data(video_info[0]) keyword = input('请输入要下载的视频BV号:') main(keyword) # url = 'https://www.bilibili.com/video/BV1TF411w7vv' # video_info = get_video_info(url) # print(video_info)