下载地址:https://www.pan38.com/share.php?code=pvvmX 提取码:7889
基于高德地图API实现商家信息采集的完整解决方案,包含核心代码实现和功能代码,当然我们这个是通过公开接口采集的,并不是破解或者逆向接口的哦,所以采集的数据都是合法合规的,只不过我们实现了批量化而是。
核心功能模块:
通过高德Place API实现商家基础信息采集911
支持多页翻页采集和异常处理
自动获取经纬度坐标和详细联系方式
数据字段说明:
名称(name)、详细地址(address)
经纬度坐标(location)
联系电话(tel/detail_tel)2
行政区划信息(pname/cityname/adname)
行业分类(type)
import requests
import pandas as pd
from tqdm import tqdm
class AmapScraper:
def init(self, api_key):
self.api_key = api_key
self.base_url = "https://restapi.amap.com/v3/place/text"
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
}
def fetch_poi_data(self, keywords, city=None, pages=10):
"""获取POI数据"""
all_data = []
for page in tqdm(range(1, pages+1)):
params = {
'key': self.api_key,
'keywords': keywords,
'city': city,
'offset': 25,
'page': page,
'extensions': 'all',
'output': 'json'
}
try:
response = requests.get(
self.base_url,
params=params,
headers=self.headers,
timeout=10
)
data = response.json()
if data['status'] == '1':
all_data.extend(data['pois'])
except Exception as e:
print(f"Error fetching page {page}: {str(e)}")
return all_data
def parse_poi_info(self, pois):
"""解析POI详细信息"""
result = []
for poi in tqdm(pois):
detail_url = "https://restapi.amap.com/v3/place/detail"
params = {
'key': self.api_key,
'id': poi['id'],
'extensions': 'all'
}
try:
resp = requests.get(detail_url, params=params, timeout=5)
detail = resp.json()
if detail['status'] == '1':
info = {
'name': poi['name'],
'address': poi['address'],
'location': poi['location'],
'tel': poi.get('tel', ''),
'detail_tel': detail['pois'][0].get('tel', ''),
'type': poi['type'],
'pname': poi['pname'],
'cityname': poi['cityname'],
'adname': poi['adname']
}
result.append(info)
except Exception as e:
print(f"Error parsing {poi['id']}: {str(e)}")
return result
def save_to_excel(self, data, filename):
"""保存到Excel文件"""
df = pd.DataFrame(data)
df.to_excel(filename, index=False, encoding='utf-8-sig')
print(f"数据已保存到 {filename}")
if name == "main":
# 替换为您的高德API Key
scraper = AmapScraper(api_key="YOUR_AMAP_API_KEY")
# 采集餐饮类商家(可修改关键词)
pois = scraper.fetch_poi_data("餐饮", city="北京", pages=5)
# 解析详细信息
detailed_data = scraper.parse_poi_info(pois)
# 保存结果
scraper.save_to_excel(detailed_data, "amap_business_data.xlsx")
[amap]
api_key = YOUR_AMAP_API_KEY
default_city = 北京
default_keywords = 餐饮
max_pages = 10
output_format = excel
timeout = 10
代码语言:txt
AI代码解释
import concurrent.futures
import logging
from queue import Queue
import pandas as pd
import requests
class EnhancedAmapScraper:
def init(self, api_key, max_workers=5):
self.api_key = api_key
self.base_url = "https://restapi.amap.com/v3/place/text"
self.session = requests.Session()
self.logger = self._setup_logger()
self.max_workers = max_workers
def _setup_logger(self):
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler('amap_scraper.log')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def _make_request(self, params):
try:
resp = self.session.get(
self.base_url,
params=params,
timeout=10,
headers={'User-Agent': 'Mozilla/5.0'}
)
resp.raise_for_status()
return resp.json()
except Exception as e:
self.logger.error(f"Request failed: {str(e)}")
return None
def fetch_pois(self, keywords, city=None, pages=10):
work_queue = Queue()
result_queue = Queue()
for page in range(1, pages+1):
work_queue.put({
'keywords': keywords,
'city': city,
'page': page
})
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
futures = [
executor.submit(
self._worker,
work_queue,
result_queue
) for _ in range(self.max_workers)
]
concurrent.futures.wait(futures)
return list(result_queue.queue)
def _worker(self, work_queue, result_queue):
while not work_queue.empty():
try:
task = work_queue.get_nowait()
params = {
'key': self.api_key,
'keywords': task['keywords'],
'city': task['city'],
'page': task['page'],
'offset': 20,
'extensions': 'all'
}
data = self._make_request(params)
if data and data['status'] == '1':
for poi in data['pois']:
result_queue.put(poi)
except Exception as e:
self.logger.error(f"Worker error: {str(e)}")
finally:
work_queue.task_done()
def get_poi_details(self, poi_ids):
detail_url = "https://restapi.amap.com/v3/place/detail"
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
futures = []
for poi_id in poi_ids:
futures.append(executor.submit(
self._get_single_detail,
detail_url,
poi_id
))
for future in concurrent.futures.as_completed(futures):
result = future.result()
if result:
results.append(result)
return results
def _get_single_detail(self, url, poi_id):
try:
resp = self.session.get(
url,
params={
'key': self.api_key,
'id': poi_id,
'extensions': 'all'
},
timeout=5
)
data = resp.json()
if data['status'] == '1':
return {
'id': poi_id,
'name': data['pois'][0]['name'],
'tel': data['pois'][0].get('tel', ''),
'address': data['pois'][0]['address'],
'location': data['pois'][0]['location'],
'photos': [p['url'] for p in data['pois'][0].get('photos', [])]
}
except Exception as e:
self.logger.error(f"Detail fetch error for {poi_id}: {str(e)}")
return None
if name == "main":
scraper = EnhancedAmapScraper(api_key="YOUR_API_KEY")
# 采集餐饮类POI
pois = scraper.fetch_pois("餐饮", city="北京", pages=5)
# 获取详细信息
poi_ids = [poi['id'] for poi in pois]
details = scraper.get_poi_details(poi_ids[:50]) # 限制50条测试
# 保存结果
pd.DataFrame(details).to_excel("amap_details.xlsx", index=False)