下载地址:https://www.pan38.com/dow/share.php?code=JCnzE 提取密码:9182
该工具支持高德/百度/腾讯三平台商家电话采集,包含多线程处理和数据去重功能37。使用时需先申请各平台API密钥并配置到config.json中
import requests
import pandas as pd
from tqdm import tqdm
import json
import os
import time
from concurrent.futures import ThreadPoolExecutor
class MapPhoneScraper:
def init(self):
self.config = self._load_config()
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
}
def _load_config(self):
"""加载API密钥配置"""
try:
with open('config.json') as f:
return json.load(f)
except:
return {
"amap_key": "your_amap_key",
"baidu_key": "your_baidu_key",
"qq_key": "your_qq_key",
"max_workers": 5,
"output_dir": "output_data"
}
def scrape_amap(self, keyword, city, pages=10):
"""高德地图POI采集"""
base_url = "https://restapi.amap.com/v3/place/text"
results = []
for page in tqdm(range(1, pages+1), desc=f"高德采集[{city}-{keyword}]"):
params = {
'key': self.config['amap_key'],
'keywords': keyword,
'city': city,
'offset': 25,
'page': page,
'extensions': 'all',
'output': 'json'
}
try:
resp = requests.get(base_url, params=params, headers=self.headers, timeout=10)
data = resp.json()
if data['status'] == '1':
for poi in data['pois']:
results.append({
'来源': '高德地图',
'名称': poi.get('name'),
'地址': poi.get('address'),
'电话': poi.get('tel') or poi.get('detail_info', {}).get('tel'),
'经纬度': poi.get('location')
})
time.sleep(0.5)
except Exception as e:
print(f"高德采集异常: {e}")
return results
def scrape_baidu(self, keyword, region, pages=10):
"""百度地图POI采集"""
base_url = "https://api.map.baidu.com/place/v2/search"
results = []
for page in tqdm(range(1, pages+1), desc=f"百度采集[{region}-{keyword}]"):
params = {
'query': keyword,
'region': region,
'output': 'json',
'ak': self.config['baidu_key'],
'scope': 2,
'page_size': 20,
'page_num': page-1
}
try:
resp = requests.get(base_url, params=params, headers=self.headers, timeout=10)
data = resp.json()
if data.get('status') == 0:
for poi in data.get('results', []):
results.append({
'来源': '百度地图',
'名称': poi.get('name'),
'地址': poi.get('address'),
'电话': poi.get('telephone') or poi.get('detail_info', {}).get('telephone'),
'经纬度': poi.get('location')
})
time.sleep(0.5)
except Exception as e:
print(f"百度采集异常: {e}")
return results
def scrape_qq(self, keyword, city, pages=10):
"""腾讯地图POI采集"""
base_url = "https://apis.map.qq.com/ws/place/v1/search"
results = []
for page in tqdm(range(1, pages+1), desc=f"腾讯采集[{city}-{keyword}]"):
params = {
'keyword': keyword,
'boundary': f'region({city},0)',
'page_size': 20,
'page_index': page,
'key': self.config['qq_key']
}
try:
resp = requests.get(base_url, params=params, headers=self.headers, timeout=10)
data = resp.json()
if data.get('status') == 0:
for poi in data.get('data', []):
results.append({
'来源': '腾讯地图',
'名称': poi.get('title'),
'地址': poi.get('address'),
'电话': poi.get('tel'),
'经纬度': f"{poi.get('location', {}).get('lat')},{poi.get('location', {}).get('lng')}"
})
time.sleep(0.5)
except Exception as e:
print(f"腾讯采集异常: {e}")
return results
def batch_scrape(self, keywords, cities, platforms=['amap', 'baidu', 'qq']):
"""批量采集多平台数据"""
all_data = []
with ThreadPoolExecutor(max_workers=self.config.get('max_workers', 3)) as executor:
futures = []
for platform in platforms:
for city in cities:
for keyword in keywords:
if platform == 'amap':
futures.append(executor.submit(self.scrape_amap, keyword, city))
elif platform == 'baidu':
futures.append(executor.submit(self.scrape_baidu, keyword, city))
elif platform == 'qq':
futures.append(executor.submit(self.scrape_qq, keyword, city))
for future in futures:
all_data.extend(future.result())
return all_data
def save_to_excel(self, data, filename=None):
"""保存数据到Excel"""
if not os.path.exists(self.config['output_dir']):
os.makedirs(self.config['output_dir'])
df = pd.DataFrame(data)
df = df.drop_duplicates(subset=['电话'], keep='first')
if not filename:
filename = f"商家电话_{time.strftime('%Y%m%d_%H%M%S')}.xlsx"
save_path = os.path.join(self.config['output_dir'], filename)
df.to_excel(save_path, index=False)
print(f"数据已保存到: {save_path}")
return save_path
if name == 'main':
scraper = MapPhoneScraper()
# 配置采集参数
keywords = ['餐饮', '酒店', '超市'] # 搜索关键词
cities = ['北京', '上海', '广州'] # 目标城市
# 执行采集
data = scraper.batch_scrape(keywords, cities)
# 保存结果
scraper.save_to_excel(data)