一、业务与环境概述
电商价格监控核心诉求为周期性采集 eBay 商品售价、留存价格快照、触发降价告警。eBay 页面以服务端渲染为主,无需解析 JS,但存在 URL 结构复杂、区间价、严格访问频率限制等问题;长期批量爬取品类数据时,单 IP 高频请求极易触发限流封禁。
本文基于 Python 实现一套完整 eBay 价格监控体系,涵盖商品检索、价格解析、数据持久化、价格异动检测、降价通知全链路,并集成亿牛云隧道代理规避 IP 封禁风险,代码可直接部署运行,仅作技术演示用途。
页面数据定位
依托浏览器开发者工具(F12)解析 DOM 节点,下表为当前可用 CSS 选择器(受平台 A/B 测试影响,Class 名称会动态变更,选择器失效时需重新校验):
表格
数据项
CSS 选择器
商品标题
itemTitle / h1.x-item-title__mainTitle
在售价格
.x-price-primary .ux-textspans
划线原价
.x-price-approxprice
运费
.ux-labels-valuesvalues .ux-textspans
搜索列表项
.srp-results .s-item
列表页价格
.s-item__price
二、搜索列表采集与代理方案
eBay 对搜索接口管控严格,单 IP 连续请求短时间内即触发限流。方案采用亿牛云隧道代理,统一请求入口为t.16yun.cn:31111,由云端自动完成 IP 轮换,标准版 IP 池体量超 30 万,网络延迟低至 100ms。
代理接入核心逻辑
通过请求头Proxy-Tunnel传递随机数值实现 IP 动态切换,该方式相比 TCP 长连接模式稳定性更强,连接中断也可正常切换出口 IP。
python
运行
```import requests
import random
隧道代理基础配置
proxyHost = "t.16yun.cn"
proxyPort = "31111"
proxyUser = "your_username"
proxyPass = "your_password"
proxyMeta = f"http://{proxyUser}:{proxyPass}@{proxyHost}:{proxyPort}"
proxies = {"http": proxyMeta, "https": proxyMeta}
搜索采集器完整实现
基于requests会话维持请求上下文,结合BeautifulSoup实现页面解析,通过数据类结构化存储商品信息,内置随机请求间隔降低风控概率:
python
运行
```from bs4 import BeautifulSoup
from urllib.parse import quote
from dataclasses import dataclass
from typing import Optional, List
import time
import random
import sqlite3
import hashlib
@dataclass
class Product:
"""商品数据实体类"""
ebay_id: str = ""
title: str = ""
url: str = ""
price: float = 0.0
original_price: float = 0.0
currency: str = "USD"
shipping: str = ""
seller: str = ""
condition: str = ""
image_url: str = ""
uid: str = ""
def __post_init__(self):
if not self.uid and self.ebay_id:
self.uid = hashlib.md5(self.ebay_id.encode()).hexdigest()[:12]
class EbaySearchScraper:
"""eBay搜索列表采集器(集成隧道代理)"""
BASE_URL = "https://www.ebay.com/sch/i.html"
def __init__(self, proxy_user: str = "", proxy_pass: str = ""):
self.session = requests.Session()
# 模拟标准浏览器请求头
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/125.0.0.0 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9",
})
self.use_proxy = bool(proxy_user and proxy_pass)
self.proxies = None
if self.use_proxy:
proxy_meta = f"http://{proxy_user}:{proxy_pass}@t.16yun.cn:31111"
self.proxies = {"http": proxy_meta, "https": proxy_meta}
def _get(self, url, **kwargs):
"""封装请求方法,动态切换出口IP"""
headers = {}
if self.use_proxy:
headers["Proxy-Tunnel"] = str(random.randint(1, 10000))
return self.session.get(url, proxies=self.proxies, headers=headers, timeout=15,** kwargs)
def search(self, keyword: str, max_pages: int = 3, delay: float = 3.0) -> List[Product]:
"""分页检索商品列表"""
all_products = []
for page in range(1, max_pages + 1):
params = {"_nkw": keyword, "_pgn": page, "_ipg": 60}
try:
resp = self._get(self.BASE_URL, params=params)
if resp.status_code != 200:
break
products = self._parse_search(resp.text)
all_products.extend(products)
time.sleep(delay + random.uniform(0.5, 2.0))
except Exception:
break
return all_products
def _parse_search(self, html: str) -> List[Product]:
"""解析搜索页HTML"""
soup = BeautifulSoup(html, "html.parser")
products = []
for item in soup.select(".srp-results .s-item"):
try:
title_el = item.select_one(".s-item__title")
link_el = item.select_one(".s-item__link")
if not all([title_el, link_el]):
continue
title = title_el.get_text(strip=True)
url = link_el.get("href", "")
ebay_id = url.split("/itm/")[1].split("?")[0] if "/itm/" in url else ""
price = self._parse_price(item.select_one(".s-item__price"))
shipping = item.select_one(".s-item__shipping").get_text(strip=True) if item.select_one(".s-item__shipping") else ""
image_url = item.select_one(".s-item__image-img").get("src", "") if item.select_one(".s-item__image-img") else ""
if ebay_id and price > 0:
products.append(Product(ebay_id=ebay_id, title=title, url=url, price=price, shipping=shipping, image_url=image_url))
except Exception:
continue
return products
@staticmethod
def _parse_price(el) -> float:
"""价格文本清洗与类型转换,区间价取首值"""
if not el:
return 0.0
text = el.get_text(strip=True).split("to")[0].replace("$", "").replace("C", "").replace(",", "")
try:
return float(text)
except ValueError:
return 0.0
三、商品详情页数据解析
复用采集器的会话与代理配置,拉取详情页完整数据,提取原价、精准运费、卖家信息等维度数据:
python
运行
```class EbayDetailScraper:
"""商品详情页采集解析器"""
def init(self, scraper: EbaySearchScraper):
self._get = scraper._get
def fetch_detail(self, product: Product) -> Product:
try:
resp = self._get(product.url)
if resp.status_code != 200:
return product
soup = BeautifulSoup(resp.text, "html.parser")
# 解析在售价格
price_el = soup.select_one(".x-price-primary .ux-textspans")
if price_el:
product.price = self._parse(price_el.get_text(strip=True))
# 解析划线原价
orig_el = soup.select_one(".x-price-approx__price .ux-textspans--STRIKETHROUGH")
if orig_el:
product.original_price = self._parse(orig_el.get_text(strip=True))
# 解析运费
for sec in soup.select(".ux-labels-values__values-content .ux-textspans"):
text = sec.get_text(strip=True).lower()
if "free" in text or "$" in text:
product.shipping = text
break
# 解析卖家名称
seller_el = soup.select_one(".x-seller-info__name")
if seller_el:
product.seller = seller_el.get_text(strip=True)
except Exception:
pass
return product
@staticmethod
def _parse(text: str) -> float:
"""详情页价格清洗"""
cleaned = text.replace("$", "").replace(",", "").strip()
try:
return float(cleaned.split()[0])
except (ValueError, IndexError):
return 0.0
四、历史价格数据持久化
采用轻量级 SQLite 构建数据存储层,分商品基础表与价格历史表做数据隔离,建立索引提升查询效率,支持数据更新、历史记录写入、价格比对查询:
python
运行
```class PriceDatabase:
def __init__(self, db_path: str = "ebay_prices.db"):
self.conn = sqlite3.connect(db_path)
# 商品基础信息表
self.conn.execute("""
CREATE TABLE IF NOT EXISTS products (
ebay_id TEXT PRIMARY KEY,
title TEXT, url TEXT, image_url TEXT, seller TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# 价格历史记录表
self.conn.execute("""
CREATE TABLE IF NOT EXISTS price_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ebay_id TEXT, price REAL, original_price REAL, shipping TEXT,
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (ebay_id) REFERENCES products(ebay_id)
)
""")
self.conn.execute("CREATE INDEX IF NOT EXISTS idx_hist ON price_history(ebay_id)")
self.conn.commit()
def upsert_product(self, p: Product):
"""新增/更新商品基础信息"""
self.conn.execute(
"INSERT OR REPLACE INTO products (ebay_id, title, url, image_url, seller) VALUES (?,?,?,?,?)",
(p.ebay_id, p.title, p.url, p.image_url, p.seller)
)
self.conn.commit()
def record_price(self, p: Product):
"""写入价格快照"""
self.conn.execute(
"INSERT INTO price_history (ebay_id, price, original_price, shipping) VALUES (?,?,?,?)",
(p.ebay_id, p.price, p.original_price, p.shipping)
)
self.conn.commit()
def get_latest(self, ebay_id: str) -> Optional[dict]:
"""查询单商品最新价格"""
row = self.conn.execute(
"SELECT price, recorded_at FROM price_history WHERE ebay_id=? ORDER BY recorded_at DESC LIMIT 1",
(ebay_id,)
).fetchone()
return {"price": row[0], "date": row[1]} if row else None
def find_drops(self, threshold: float = 0.1) -> List[dict]:
"""批量筛选降幅达标的降价商品"""
rows = self.conn.execute("""
SELECT h1.ebay_id, p.title, h1.price old_price, h2.price new_price,
(h1.price - h2.price)/h1.price drop_ratio, h2.recorded_at
FROM price_history h1
JOIN price_history h2 ON h1.ebay_id = h2.ebay_id
JOIN products p ON h1.ebay_id = p.ebay_id
WHERE h2.recorded_at > h1.recorded_at
AND h1.id = (SELECT MAX(id) FROM price_history WHERE ebay_id=h1.ebay_id AND id < h2.id)
AND (h1.price - h2.price)/h1.price >= ?
ORDER BY drop_ratio DESC
""", (threshold,)).fetchall()
return [{
"ebay_id": r[0], "title": r[1], "old": r[2], "new": r[3],
"drop_pct": round(r[4]*100,1), "date": r[5]
} for r in rows]
五、价格异动检测与告警
对比当期价格与历史快照,设定降幅阈值触发告警,整合全模块实现任务调度:
```class PriceAlert:
def init(self, db: PriceDatabase):
self.db = db
def check(self, products: List[Product]) -> List[dict]:
"""检测价格变动,生成降价告警"""
alerts = []
for p in products:
self.db.upsert_product(p)
self.db.record_price(p)
latest = self.db.get_latest(p.ebay_id)
if not latest or latest["price"] <= 0 or p.price <= 0:
continue
drop_rate = (latest["price"] - p.price) / latest["price"]
# 降幅≥5%触发告警
if drop_rate >= 0.05:
alerts.append({
"type": "↓", "ebay_id": p.ebay_id, "title": p.title,
"old": latest["price"], "new": p.price,
"drop": round(drop_rate*100,1), "url": p.url
})
return alerts
主运行入口
def track(keywords: List[str], delay: float = 3.0):
db = PriceDatabase()
scraper = EbaySearchScraper(proxy_user="your_user", proxy_pass="your_password")
detail = EbayDetailScraper(scraper)
alert = PriceAlert(db)
all_alerts = []
for kw in keywords:
print(f"\n=== 检索关键词:{kw} ===")
products = scraper.search(kw, max_pages=2, delay=delay)
for idx, p in enumerate(products[:10]):
print(f"解析详情 {idx+1}/10:{p.title[:30]}...")
detail.fetch_detail(p)
time.sleep(delay + random.uniform(1, 3))
all_alerts.extend(alert.check(products))
if all_alerts:
print(f"\n{'='*50}\n检测到{len(all_alerts)}条价格变动:")
for item in all_alerts:
print(f"{item['type']} 降幅{item['drop']}% | ${item['old']:.2f} → ${item['new']:.2f} | {item['title'][:35]}")
else:
print("\n未检测到显著价格变动")
启动监控任务
if name == "main":
track(["mechanical keyboard", "rtx 4070", "sony wh-1000xm5"])
```
六、代理选型与技术要点
- 代理方案选型
eBay 不同采集场景适配不同代理模式:
搜索列表 / 常规详情页:动态转发标准版,依托大规模 IP 池实现请求隔离,配合Proxy-Tunnel头部动态换 IP;
登录态 / 会话保持采集:固定转发版,短时锁定 IP 维持会话有效性。
IP 切换两种机制对比:
TCP Keep-Alive:复用会话连接,连接重建后自动换 IP;
Proxy-Tunnel 请求头:通过随机数值强制切换 IP,不受网络连接状态影响,本方案优先采用。 - 常见故障排查
搜索结果为空:请求头Accept-Language需配置为en-US,禁用原生requests默认 UA,匹配浏览器标识;
代理 407 错误:隧道代理账号、密码校验失败,核对平台凭据;
价格解析异常:存在多币种、区间价场景,需扩展清洗逻辑适配不同格式;
CSS 选择器失效:平台前端迭代更新 Class,需重新通过 F12 抓取节点; - 方案边界与合规说明
合规约束:遵循 eBay robots.txt 规则,请求间隔不低于 3 秒,数据仅作个人价格分析,禁止商用分发;
场景限制:本方案为定时批处理模式,不支持秒杀类实时监控;部分动态渲染页面需改用 Playwright 等无头浏览器;
性能边界:批量高频轮询会提升风控概率,大规模监控建议拆分任务、分布式部署。