import requests import re from lxml import etree '''噢百万抓取''' url = 'http://www.obaiwan.com/hk49/results/' p = re.compile('''<tr >\r\n<td >.+?</td>\r\n<td >(.+?)</td>\r\n<td >(.+?)</td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td >.+?</td>\r\n<td ><b style=".+?">(.+?)</b></td>\r\n</tr>''') f = open('history.txt','w') res = '' for i in range(2003, 2016): year = i data = {'qinum':year,'submit':'%CC%E1%BD%BB%B2%E9%D1%AF'} r = requests.post(url, data=data) r.encoding = 'gb2312' matchs = p.findall(r.text) for row in matchs: res += ','.join(row) + '\n' f.write(res) f.close()