import requests
import re # 此模块专门用来提取有效信息
url = 'https://movie.douban.com/top250'
head = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/93.0.4577.82 Safari/537.36 '
}
resp = requests.get(url, headers=head)
# print(resp.text)
page_content = resp.text
# 解析数据
obj = re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>.*?<p class="">.*?<br>('
r'?P<year>.*?)  '
'.*?<span class="rating_num" property="v:average">(?P<evaluate>.*?)</span>', re.S)
# 开始数据
result = obj.finditer(page_content)
for it in result:
print(it.group('name'))
print(it.group('year').strip())
print(it.group('evaluate').strip())