解析某个频道,全部的结果
import json import requests feed_url = 'https://podcasts.google.com/feed/aHR0cHM6Ly93d3cueGltYWxheWEuY29tL2FsYnVtLzQwNDc3NDU0LnhtbA?sa=X&ved=0CNwBEI7OAigIahcKEwjgyKrX5Or-AhUAAAAAHQAAAAAQAQ' def get_item_from_feed(url): out_item_list = [] html = requests.get(url).text #with open('1.html', 'w') as f2: # f2.write(html) for line in html.split('\n'): if 'AF_initDataCallback(' in line: blocks = line.split("AF_initDataCallback(") #print(blocks) assert ');' in blocks[1] block = blocks[1].split(");")[0] #print(block) #with open('1.js', 'w') as f3: # f3.write(block) assert 'data:' in block assert ', sideChannel:' in block list_raw = block.split('data:')[1] list_raw = list_raw.split(', sideChannel:')[0] with open('1.json' , 'w' ) as f4: f4.write(list_raw) d_list = json.loads(list_raw) #print(d_list[1]) list2 = d_list[1] for item in list2: for sub_item in item: print(sub_item) channel = sub_item[1] id1 = sub_item[11] id2 = sub_item[12] title = sub_item[8] link = sub_item[13] desc = sub_item[14] image = sub_item[15] out_item = { "channel": channel, "id1": id1, "id2": id2, "title": title, "link": link, "desc": desc, "image": image, } print(out_item) out_item_list.append(out_item) with open("out_item.list", 'w') as f5: for item in out_item_list: raw1 = json.dumps(item, ensure_ascii=False) f5.write(raw1+'\n') return out_item_list def get_one_audio(url, file_name): res = requests.get(URL) print(res) c = res.content with open('file_name' ,'wb') as f1: f1.write(c) def main(): get_item_from_feed(feed_url) if __name__ == "__main__": main()