1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
from
bs4
import
BeautifulSoup
import
urllib2
url
=
'https://www.douban.com'
cookie
=
'll="118234"; __yadk_uid=FZYkMR92OctgDfVQxh7rgOvKAfSaAcF1; gr_user_id=30-b429-d8ac2b39f39e; _vwo_uuid_v2=62C802065BA1FE1E49689EB42248C9B5|86bc597a128b6ebcf16129a36961cd49; bid=Ogc8aq4tIsk; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1519356140%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DynTCvqw85IEmoWlag4b0hClM5qTjixjEN46Bbi_l7O1HuW1WreuRM_BxXp7M6Dyo%26wd%3D%26eqid%3Dd6c4a5f10001bb85000000025a8f88e8%22%5D; _pk_ses.100001.8cb4=*; __utma=30149280.832780041.1482799300.1517562754.1519356141.20; __utmc=30149280; __utmz=30149280.1519356141.20.19.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmt=1; _ga=GA1.2.832780041.1482799300; _gid=GA1.2.925059532.1519356149; _gat_UA-7019765-1=1; dbcl2="162182190:W4cfAVJjlD0"; ck=Pky-; _pk_id.100001.8cb4=599b9f4c8e87f346.1482799300.14.1519356150.1514105301.; push_noty_num=0; push_doumail_num=0; __utmv=30149280.16218; __utmb=30149280.3.10.1519356141'
send_headers
=
{
'Host'
:
'www.douban.com'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
,
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
,
'Connection'
:
'keep-alive'
,
'Cookie'
: cookie
}
req
=
urllib2.Request(url,headers
=
send_headers)
page
=
urllib2.urlopen(req)
soup
=
BeautifulSoup(page,
'lxml'
)
print
soup.original_encoding
print
(soup).encode(
'gb18030'
)
file
=
open
(
"title.txt"
,
"w"
)
file
.write(
str
(soup))
file
.close()
print
'ok'
|
转载自阿飞的技术仓库
本文转自Grodd51CTO博客,原文链接:http://blog.51cto.com/juispan/2072344,如需转载请自行联系原作者