代码:
1 # coding=utf-8 2 import sys 3 import csv 4 import requests 5 from bs4 import BeautifulSoup 6 7 reload(sys) 8 sys.setdefaultencoding('utf-8') 9 # 请求头设置 10 11 def download(url): 12 db_data = requests.get(url) 13 soup = BeautifulSoup(db_data.text, 'lxml') 14 titles = soup.select( 15 'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > h2 > a:nth-of-type(1)') 16 houses = soup.select('body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.room') 17 oneaddresss = soup.select( 18 'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(1)') 19 twoaddresss = soup.select( 20 'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(2)') 21 prices = soup.select( 22 'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.listliright > div.money > b') 23 for title, house, oneaddress, twoaddress, price in zip(titles, houses, oneaddresss, twoaddresss, prices): 24 data = [ 25 ( 26 str(title.string).replace(' ', '').replace('\n', ''), 27 house.get_text().split(' ')[0].replace(' ', '').replace("\n", ""), 28 house.get_text().split(' ')[-1].replace(' ', '').replace("\n", ""), 29 oneaddress.get_text().replace(' ', '').replace("\n", ""), 30 twoaddress.get_text().replace(' ', '').replace("\n", ""), 31 price.get_text().replace(' ', '').replace("\n", "") 32 ) 33 ] 34 35 csvfile = open('kf.csv', 'ab') 36 writer = csv.writer(csvfile) 37 print('write one house') 38 writer.writerows(data) 39 csvfile.close() 40 41 42 # 初始化csv文件 43 def info(): 44 csvinfo = open('kf.csv', 'ab') 45 begcsv = csv.writer(csvinfo) 46 begcsv.writerow(['title', 'house', 'area', 'address1', 'address2', 'price']) 47 csvinfo.close() 48 49 50 if __name__ == '__main__': 51 info() 52 download(url)