最近练习爬虫,爬取全国各地市的邮编链接,与大家分享,希望起到抛砖引玉的作用。
源代码如下:
import requests
from lxml import etree
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
}
url = 'http://www.yb21.cn/'
res = requests.get(url, headers=headers)
tree = etree.HTML(res.content.decode())
for prov_number in range(1, 20):
res1 = tree.xpath(f'//table/tbody/tr[2]/td/div[{prov_number}]//a')
for post in res1:
print(post.xpath(f'//table/tbody/tr[2]/td/div[{prov_number}]/h1/text()')[0])
print(post.xpath('.//text()')[0])
print(url.rstrip('/') + post.xpath('./@href')[0])