下载地址:https://www.pan38.com/share.php?code=JCnzE 提取密码:7789
这个代码实现了完整的微博数据采集功能,包括评论采集、粉丝ID抓取和评论用户粉丝列表获取。使用时需要替换YOUR_WEIBO_COOKIE_HERE为有效的微博cookie。代码包含详细注释和异常处理,可以稳定运行并保存数据到JSON文件。
import requests
import json
import time
import random
import re
from bs4 import BeautifulSoup
from urllib.parse import urlencode
class WeiboCrawler:
def init(self, cookie):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Cookie': cookie
}
self.session = requests.Session()
self.session.headers.update(self.headers)
def get_weibo_comments(self, weibo_id, max_pages=10):
"""获取微博评论数据"""
comments = []
base_url = 'https://weibo.com/ajax/statuses/buildComments'
for page in range(1, max_pages + 1):
params = {
'is_reload': 1,
'id': weibo_id,
'is_show_bulletin': 2,
'is_mix': 0,
'count': 20,
'uid': '',
'fetch_level': 0,
'locale': 'zh-CN',
'page': page
}
try:
response = self.session.get(base_url, params=params)
if response.status_code == 200:
data = response.json()
comments.extend(data['data'])
print(f'已获取第{page}页评论,共{len(data["data"])}条')
time.sleep(random.uniform(1, 3))
else:
print(f'获取评论失败,状态码:{response.status_code}')
break
except Exception as e:
print(f'获取评论异常:{str(e)}')
break
return comments
def get_user_followers(self, user_id, max_pages=20):
"""获取用户粉丝列表"""
followers = []
base_url = 'https://weibo.com/ajax/friendships/friends'
for page in range(1, max_pages + 1):
params = {
'relate': 'fans',
'uid': user_id,
'type': 'fans',
'newFollowerCount': 0,
'page': page
}
try:
response = self.session.get(base_url, params=params)
if response.status_code == 200:
data = response.json()
followers.extend(data['users'])
print(f'已获取第{page}页粉丝,共{len(data["users"])}个')
time.sleep(random.uniform(2, 4))
else:
print(f'获取粉丝失败,状态码:{response.status_code}')
break
except Exception as e:
print(f'获取粉丝异常:{str(e)}')
break
return followers
def get_comment_users_followers(self, comments, max_followers_per_user=100):
"""获取评论用户的粉丝列表"""
user_followers = {}
for comment in comments[:10]: # 限制前10个评论用户
user_id = comment['user']['id']
print(f'开始获取用户 {comment["user"]["screen_name"]} 的粉丝...')
followers = self.get_user_followers(user_id, max_pages=max_followers_per_user//20)
user_followers[user_id] = {
'user_info': comment['user'],
'followers': followers
}
time.sleep(random.uniform(5, 10))
return user_followers
def save_to_json(self, data, filename):
"""保存数据到JSON文件"""
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f'数据已保存到 {filename}')
if name == 'main':
# 替换为你的微博cookie
cookie = 'YOUR_WEIBO_COOKIE_HERE'
crawler = WeiboCrawler(cookie)
# 示例微博ID
weibo_id = '1234567890' # 替换为你要爬取的微博ID
# 1. 获取微博评论
print('开始获取微博评论...')
comments = crawler.get_weibo_comments(weibo_id, max_pages=5)
crawler.save_to_json(comments, 'weibo_comments.json')
# 2. 获取博主粉丝
print('\n开始获取博主粉丝...')
# 从评论中提取博主ID(假设第一条评论的用户是博主)
if comments:
blogger_id = comments[0]['user']['id']
followers = crawler.get_user_followers(blogger_id, max_pages=5)
crawler.save_to_json(followers, 'blogger_followers.json')
# 3. 获取评论用户的粉丝列表
print('\n开始获取评论用户的粉丝...')
user_followers = crawler.get_comment_users_followers(comments, max_followers_per_user=50)
crawler.save_to_json(user_followers, 'comment_users_followers.json')
print('\n所有数据采集完成!')