Python采集京东商品评论API接口示例
下面是一个使用Python采集京东商品评论的完整示例,包括API请求、JSON数据解析和反爬处理。
完整代码示例
python import requests import json import time import random from urllib.parse import quote def get_jd_comments(product_id, page=0, page_size=10): """ 获取京东商品评论 :param product_id: 商品ID :param page: 页码(从0开始) :param page_size: 每页评论数 :return: 评论数据JSON """ # 京东评论API URL url = f"https://club.jd.com/comment/productPageComments.action" # 请求参数 params = { "callback": "fetchJSON_comment98", # 回调函数名 "productId": product_id, "score": 0, # 0=全部,1=差评,2=中评,3=好评,5=追加评论 "sortType": 5, # 排序方式 5=推荐排序,6=时间排序 "page": page, "pageSize": page_size, "isShadowSku": 0, "fold": 1 # 1=过滤短评,0=不过滤 } # 请求头,模拟浏览器访问 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Referer": f"https://item.jd.com/{product_id}.html", } try: # 发送请求 response = requests.get(url, params=params, headers=headers, timeout=10) response.raise_for_status() # 处理回调函数包裹的JSON数据 json_str = response.text[len("fetchJSON_comment98("):-2] # 解析JSON数据 comment_data = json.loads(json_str) # 随机延迟,避免请求过于频繁 time.sleep(random.uniform(1, 3)) return comment_data except Exception as e: print(f"获取评论失败: {e}") return None def parse_comments(comment_data): """ 解析评论数据 :param comment_data: 原始评论数据 :return: 结构化评论列表 """ if not comment_data or comment_data.get("code") != "0": return [] comments = comment_data.get("comments", []) result = [] for comment in comments: item = { "id": comment.get("id"), "content": comment.get("content"), "score": comment.get("score"), "creation_time": comment.get("creationTime"), "nickname": comment.get("nickname"), "product_color": comment.get("productColor"), "product_size": comment.get("productSize"), "useful_vote": comment.get("usefulVoteCount"), "images": [img.get("imgUrl") for img in comment.get("images", [])], "videos": [video.get("videoUrl") for video in comment.get("videos", [])], "after_comment": comment.get("afterUserComment", {}).get("content") } result.append(item) return result if __name__ == "__main__": # 示例商品ID (iPhone 15) product_id = "100065474274" # 获取第一页评论,每页10条 comment_data = get_jd_comments(product_id, page=0, page_size=10) if comment_data: # 打印基本信息 print(f"商品ID: {product_id}") print(f"总评论数: {comment_data['data'].get('totalCount')}") print(f"好评数: {comment_data['data'].get('goodCount')}") print(f"中评数: {comment_data['data'].get('generalCount')}") print(f"差评数: {comment_data['data'].get('poorCount')}") # 解析并打印评论 comments = parse_comments(comment_data) print("\n评论示例:") for idx, comment in enumerate(comments[:3], 1): # 只打印前3条作为示例 print(f"\n评论{idx}:") print(f"用户: {comment['nickname']}") print(f"评分: {'★' * comment['score']}") print(f"时间: {comment['creation_time']}") print(f"内容: {comment['content']}") if comment['images']: print(f"图片: {len(comment['images'])}张") if comment['after_comment']: print(f"追加评论: {comment['after_comment']}") else: print("未能获取评论数据")
JSON数据示例
以下是京东API返回的典型JSON数据结构(已格式化):
json { "code": "0", "msg": "success", "data": { "productId": "100065474274", "score": 4.9, "totalCount": 50000, "goodCount": 48000, "generalCount": 1500, "poorCount": 500, "afterCount": 800, "videoCount": 200, "hotCommentTagStatistics": [ { "id": "1", "name": "物流快", "count": 12000 }, { "id": "2", "name": "正品保障", "count": 10000 } ], "comments": [ { "id": "123456789", "guid": "abcdef-12345", "content": "手机很好用,系统流畅,拍照清晰", "creationTime": "2023-10-15 10:30:22", "isTop": false, "referenceId": "987654321", "referenceName": "Apple iPhone 15", "score": 5, "status": 1, "title": "非常满意", "usefulVoteCount": 150, "uselessVoteCount": 2, "userImage": "misc.360buyimg.com/user/head/1.jpg", "userLevelId": "6", "userProvince": "北京", "nickname": "jd_123456789", "productColor": "黑色", "productSize": "256GB", "images": [ { "id": "111111", "imgUrl": "img10.360buyimg.com/n1/s450x450_jfs/t1/123456/1/2345/123456/5e8a1a1bE12345678/1234567890abcdef.jpg" } ], "afterUserComment": { "id": 888888, "content": "已解决您反馈的问题", "created": "2023-10-18 09:20:30" } } ] } }