python 模块 requests (2) 处理url
文章目录
1. 返回数据获取方式
response.text response.json() response.content
2. request class 的格式
import re import requests class HandleLaGou(object): def __init__(self): self.laGou_session = requests.session() self.header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36' } self.city_list = "" #获取全国城市列表 def handle_city(self): city_search = re.compile(r'zhaopin/">(.*?)</a>') city_url = "https://www.lagou.com/jobs/allCity.html" city_result = self.handle_request(method = "GET", url = city_url) self.city_list = city_search.findall(city_result) def handle_request(self, method, url, data = None, info = None): if method == "GET": response = self.laGou_session.get(url = url, headers = self.header) return response.text if __name__ == '__main__': laGou = HandleLaGou() laGou.handle_city() print(laGou.city_list) pass
3. 模拟浏览器请求
4. 返回列表包含的字典与元组
html内容
<tr > <td>60</td> <td><a>leeliao</a></td> <td><a>leeliao</a></td> <td>¥56.82 元</td> <td>¥56.82元</td> <td>2014-08-11 23:33:54</td> <td>手动</td> <td><div class='iconBidState0' /></td> </tr> <tr > <td>61</td> <td><a>luo321654</a></td> <td><a>luo321654</a></td> <td>¥4,000.00 元</td> <td>¥4,000.00元</td> <td>2014-08-11 23:34:32</td> <td>手动</td> <td><div class='iconBidState0' /></td> </tr
代码
r = re.compile(r'''<td>(?P<number>\d+)</td>.*?<td><a>(?P<name>\w+)</a></td''', re.S) result = re.finditer(r, content) print [m.groupdict() for m in result]
输出结果:
[{'number': '60', 'name': 'leeliao'}, {'number': '61', 'name': 'luo321654'}]
✈推荐阅读:
python requests【1】入门
python re 正则表达式
python requests【2】高阶
w3schools Python Requests Module
runoob Python requests 模块
realpython Python’s Requests Library (Guide)
different-behavior-between-re-finditer-and-re-findall
re-findall-which-returns-a-dict-of-named-capturing-groups