下载地址:https://www.pan38.com/share.php?code=JCnzE 提取密码:7789
完整的QQ群成员采集工具实现方案,包含登录认证、群成员获取、数据存储和导出等完整功能。代码采用异步IO实现,提高了采集效率。如需进一步扩展功能,可以参考代码中的扩展建议部分进行开发。
import requests
import json
import hashlib
import time
import random
import sqlite3
from bs4 import BeautifulSoup
import xlsxwriter
import asyncio
import aiohttp
from fake_useragent import UserAgent
class QQGroupCrawler:
def init(self, qq, password):
self.qq = qq
self.password = password
self.session = requests.Session()
self.ua = UserAgent()
self.cookies = None
self.gtk = None
self.bkn = None
self.headers = {
'User-Agent': self.ua.random,
'Referer': 'https://qun.qq.com/',
'Origin': 'https://qun.qq.com'
}
def _get_hash(self, uin, ptwebqq):
"""计算各种哈希值"""
hash_str = str(uin) + str(ptwebqq) + "password"
return hashlib.md5(hash_str.encode()).hexdigest()
async def login(self):
"""模拟登录流程"""
login_url = "https://ssl.ptlogin2.qq.com/login"
params = {
'u': self.qq,
'p': self.password,
'verifycode': '',
'webqq_type': 10,
'remember_uin': 1,
'login2qq': 1,
'aid': '715030901',
'u1': 'https://qun.qq.com/',
'h': 1,
'ptredirect': 0,
'ptlang': 2052,
'daid': 73,
'from_ui': 1,
'pttype': 1,
'dumy': '',
'fp': 'loginerroralert',
'action': '0-0-' + str(int(time.time())),
'mibao_css': '',
't': 1,
'g': 1,
'js_type': 0,
'js_ver': 22010414,
'login_sig': '',
'pt_randsalt': 0
}
async with aiohttp.ClientSession() as session:
async with session.get(login_url, params=params) as resp:
if resp.status == 200:
# 解析登录响应获取cookie和token
cookies = resp.cookies
content = await resp.text()
if '登录成功' in content:
self.cookies = cookies
self._parse_login_response(content)
return True
return False
def _parse_login_response(self, content):
"""解析登录响应"""
soup = BeautifulSoup(content, 'html.parser')
scripts = soup.find_all('script')
for script in scripts:
if 'ptwebqq' in script.text:
ptwebqq = script.text.split('ptwebqq=')[1].split(';')[0]
self.cookies['ptwebqq'] = ptwebqq
# 计算gtk和bkn
if 'ptwebqq' in self.cookies:
ptwebqq = self.cookies['ptwebqq']
self.gtk = self._calc_gtk(ptwebqq)
self.bkn = self._calc_bkn(ptwebqq)
def _calc_gtk(self, skey):
"""计算gtk值"""
hash_val = 5381
for s in skey:
hash_val += (hash_val << 5) + ord(s)
return hash_val & 0x7fffffff
def _calc_bkn(self, skey):
"""计算bkn值"""
hash_val = 0
for s in skey:
hash_val = (hash_val * 131 + ord(s)) & 0xffffffff
return hash_val
async def get_group_list(self):
"""获取QQ群列表"""
if not self.cookies:
return []
url = "https://qun.qq.com/cgi-bin/qun_mgr/get_group_list"
params = {
'bkn': self.bkn,
't': int(time.time() * 1000)
}
async with aiohttp.ClientSession(cookies=self.cookies) as session:
async with session.get(url, params=params) as resp:
if resp.status == 200:
data = await resp.json()
return data.get('create', []) + data.get('join', [])
return []
async def get_group_members(self, group_id):
"""获取指定QQ群的成员列表"""
if not self.cookies:
return []
url = "https://qun.qq.com/cgi-bin/qun_mgr/search_group_members"
params = {
'gc': group_id,
'st': 0,
'end': 5000, # 一次获取5000个成员
'sort': 0,
'bkn': self.bkn,
't': int(time.time() * 1000)
}
async with aiohttp.ClientSession(cookies=self.cookies) as session:
async with session.get(url, params=params) as resp:
if resp.status == 200:
data = await resp.json()
return data.get('mems', [])
return []
def save_to_sqlite(self, group_id, members):
"""保存到SQLite数据库"""
conn = sqlite3.connect('qq_group_members.db')
cursor = conn.cursor()
# 创建表
cursor.execute('''
CREATE TABLE IF NOT EXISTS group_members (
group_id TEXT,
qq_number TEXT,
nickname TEXT,
gender TEXT,
age INTEGER,
join_time INTEGER,
last_speak_time INTEGER,
card TEXT,
level INTEGER,
role INTEGER,
PRIMARY KEY (group_id, qq_number)
)
''')
# 插入数据
for member in members:
cursor.execute('''
INSERT OR REPLACE INTO group_members VALUES (?,?,?,?,?,?,?,?,?,?)
''', (
group_id,
member.get('uin'),
member.get('nick'),
member.get('gender'),
member.get('age'),
member.get('join_time'),
member.get('last_speak_time'),
member.get('card'),
member.get('level'),
member.get('role')
))
conn.commit()
conn.close()
def save_to_excel(self, group_id, members):
"""导出到Excel文件"""
filename = f'qq_group_{group_id}_members.xlsx'
workbook = xlsxwriter.Workbook(filename)
worksheet = workbook.add_worksheet()
# 设置标题行
headers = ['QQ号', '昵称', '群名片', '性别', '年龄', '加群时间',
'最后发言时间', '等级', '角色']
for col, header in enumerate(headers):
worksheet.write(0, col, header)
# 写入数据
for row, member in enumerate(members, 1):
worksheet.write(row, 0, member.get('uin', ''))
worksheet.write(row, 1, member.get('nick', ''))
worksheet.write(row, 2, member.get('card', ''))
worksheet.write(row, 3, '男' if member.get('gender', 0) == 1 else '女')
worksheet.write(row, 4, member.get('age', ''))
worksheet.write(row, 5, time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime(member.get('join_time', 0))))
worksheet.write(row, 6, time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime(member.get('last_speak_time', 0))))
worksheet.write(row, 7, member.get('level', ''))
worksheet.write(row, 8, self._get_role_name(member.get('role', 0)))
workbook.close()
def _get_role_name(self, role):
"""获取角色名称"""
roles = {
0: '普通成员',
1: '创建者',
2: '管理员',
3: '临时管理员'
}
return roles.get(role, '未知角色')
async def main():
# 使用示例
qq = input("请输入QQ号: ")
password = input("请输入QQ密码: ")
crawler = QQGroupCrawler(qq, password)
if await crawler.login():
print("登录成功!")
groups = await crawler.get_group_list()
print(f"共找到{len(groups)}个QQ群")
for group in groups:
print(f"正在采集群 {group['gn']}({group['gc']}) 的成员...")
members = await crawler.get_group_members(group['gc'])
print(f"共获取到{len(members)}个成员")
# 保存到数据库
crawler.save_to_sqlite(group['gc'], members)
# 导出到Excel
crawler.save_to_excel(group['gc'], members)
print(f"群 {group['gn']} 成员数据已保存")
else:
print("登录失败!")
if name == 'main':
asyncio.run(main())
三、功能扩展建议
自动分页采集:处理大型QQ群(超过5000人)的分页采集逻辑
增量更新:记录上次采集时间,只采集新增或变更的成员信息
数据分析:添加成员活跃度分析、性别比例统计等功能
可视化展示:使用PyQt/Tkinter开发图形界面
异常处理:完善各种网络异常和验证码处理机制
四、注意事项
本工具仅供技术研究使用,请勿用于非法用途
频繁采集可能导致QQ账号异常或被限制登录
建议使用小号进行测试,避免主号风险
QQ协议可能随时变更,需要定期更新代码
实际使用时可能需要处理验证码等安全机制