用Python一键艺龙酒店各个城市数据存入mysql
前言:
在这篇文章中,我将分享如何利用Python轻松爬取易龙API中的酒店数据。易龙是一家知名的中国酒店预订平台,他们的API提供了丰富的酒店信息,包括价格、评论等。我们将使用Python以及Requests库进行HTTP请求,BeautifulSoup库用于解析HTML。
准备工作:
Python基础知识
HTTP请求和API的基本了解
在你的系统上安装Python
安装Requests库 (pip install requests)
脚本编写:
首先,让我们设置Python环境。创建一个新的Python文件(例如 yilong_scraper.py)并导入必要的库:
import json import time import requests
抓取酒店数据:
现在,让我们深入代码。我们首先定义一个名为 YiLongList 的类来处理抓取过程。这个类将有方法来从API获取酒店数据和解析JSON响应。
class YiLongList: def __init__(self, inDate, outDate, city): # 初始化类变量 self.inDate, self.outDate = inDate, outDate self.city = city self.session = requests.Session() self.hotels_data = [] def jxList(self, hotelList): # 从酒店列表中提取数据的方法 for h in hotelList: # 从每个酒店中提取相关信息 hotelName = h.get("hotelName") starLevelDes = h.get("starLevelDes") commentScore = h.get("commentScore") # 以此类推提取其他字段 # 构建一个包含提取数据的字典 data = { 'hotelName': hotelName, 'starLevelDes': starLevelDes, 'commentScore': commentScore, # 类似地添加其他字段 } # 将数据添加到列表中 self.hotels_data.append(data) # 打印提取的数据(可选) print(hotelName, starLevelDes, commentScore) def getList(self, page, traceToken): url = "https://hotel.elong.com/tapi/v2/list" if page == 0: params = { "city": self.city, "inDate": self.inDate, "outDate": self.outDate, "filterList": "8888_1", "pageIndex": str(page), "pageSize": "20", "sugActInfo": "" } else: params = { "city": self.city, "inDate": self.inDate, "outDate": self.outDate, "filterList": "8888_1", "pageIndex": str(page), "pageSize": "20", "sugActInfo": "", "traceToken":traceToken } try: response = requests.get(url, headers=headers, params=params) time.sleep(1) resqJson = response.json() print(resqJson) hotelList = resqJson["data"]["hotelList"] traceToken = resqJson["data"]["traceToken"] if hotelList: # print(hotelList) self.jxList(hotelList) return traceToken else: return "have_slide" except: return None def main(self): # 控制抓取过程的主方法 traceToken = "" page = 0 while page < 5: # 遍历页面 traceToken = self.getList(page, traceToken) if traceToken == "have_slide": pass if traceToken and traceToken != "have_slide": page += 1 # 抓取完成后,调用一个函数将数据保存到MySQL数据库中 # request_data2mysql(self.hotels_data, self.city) # 清空数据列表以备下次使用 self.hotels_data.clear()
- 运行脚本:
要运行脚本,我们需要为每个城市创建一个 YiLongList 实例并调用 main() 方法。
if __name__ == "__main__": # 定义城市和日期 inDate = "2024-03-09" outDate = "2022-03-10" city_dict = { "1701": "郑州", "0801": "大连", "2010": "惠州", # 如果需要,可以添加更多城市 } # 遍历城市 for city_code, city_name in city_dict.items(): print(f"正在抓取 {city_name} 的数据...") yi = YiLongList(inDate, outDate, city_code) yi.main()
- 写入数据库代码
import pymysql config = { 'host': 'localhost', 'port': 3306, 'user': 'root', 'password': '12345678', 'db': 'mydb', 'charset': 'utf8mb4', 'cursorclass': pymysql.cursors.DictCursor } def request_data2mysql(datas,city): connection = pymysql.connect(**config) try: with connection.cursor() as cursor: # 构建插入语句 for data in datas: try: data['city']=city insert_query = """ INSERT INTO hotels ( hotelName, starLevelDes, commentCount, price, commentScore, commentScoreDes, areaName, hotelTags, themeList, recallReason, commentMainTag, hotelAddress, trafficInfo,city ) VALUES ( %(hotelName)s, %(starLevelDes)s, %(commentCount)s, %(price)s, %(commentScore)s, %(commentScoreDes)s, %(areaName)s, %(hotelTags)s, %(themeList)s, %(recallReason)s, %(commentMainTag)s, %(hotelAddress)s, %(trafficInfo)s,%(city)s ) """ # 执行插入操作 cursor.execute(insert_query, data) except Exception as e: print(data) pass # 提交事务 connection.commit() print("Data inserted successfully!") except Exception as e: raise e finally: # 关闭数据库连接 if connection.open: connection.close() print("Connection closed.") if __name__ == '__main__': # 准备插入的数据 data = [{ 'hotelName': 'Example Hotel', 'starLevelDes': '5 Star Luxury', 'commentCount': 100, 'price': 200.0, 'commentScore': 4.5, 'commentScoreDes': 'Excellent', 'areaName': 'Downtown', 'hotelTags': 'Luxury, Spa, Pool', 'themeList': 'Modern, Business', 'recallReason': 'Great service and location', 'commentMainTag': 'Cleanliness', 'hotelAddress': '123 Example Street', 'trafficInfo': 'Close to subway station', 'city':'beijing' }] request_data2mysql(data)
在本文中,我们学习了如何使用Python从易龙API中抓取酒店数据。我们创建了一个抓取器类,从API获取数据并提取相关信息。这些数据可以进一步处理或存储在数据库中进行分析或其他用途。
- 运行结果