1.get请求方式:urllib.parse.quote()
这个方法就是可以将代码中的文字转换成unicode编码的方式
导包
import urllib.request
import urllib.parse
写固定的url,这个url之后还要和参数进行拼接的
url = "https://www.baidu.com/s?wd="
写UA
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36"
}
==将汉字转变成unicode编码,之后将url进行拼接== 这个的好处就是不用在url中修改一长串字符了,只是需要修改文字内容就行了。
# 将周杰伦三个字编程unicode
name = urllib.parse.quote("周杰伦")
name的值就是下面的一长串,
拼接字符串url
url = url + name
请求对象的定制
这里要注意的是参数要是用键值对的形式,因为这个参数位置有问题,对不上号
request = urllib.request.Request(url = url , headers=headers)
python模拟浏览器发送请求,response作为响应结果
response = urllib.request.urlopen(request)
2.get请求方式:urllib.parse.urlencode()
这个解决的问题是参数过多的问题,总不能一个一个的拼接字符串吧。
先写一个基础的url地址
base_url = "https://www.baidu.com/s?"
之后写data,这个就是url后面需要拼接的参数,很多个参数,可以使用一个字典进行保存。
data = {
"wd":"周杰伦",
"sex":"男",
"location":"中国台湾省"
}
之后就通过方法将data转换成unicode编码,就是一长串的东西
new_data = urllib.parse.urlencode(data)
准备UA
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36"
}
之后就是封装成请求对象,然后将它发送给服务器
# 请求对象的定制
request = urllib.request.Request(url = url , headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode("utf-8")
print(content)
3.post请求方式
有的请求方式是post请求,下面给出方案
导包
import urllib.request
import urllib.parse
之后就是写url地址
url = "https://fanyi.baidu.com/sug"
之后找到需要传递的参数
data = {
"kw":"spider"
}
下一步就是post的特殊之处。需要先编码之后再转换成按字节的类型
# post的请求的参数必须要进行编码 post的请求参数是不会直接拼接到ulr后面的
# post请求的参数必须要进行编码 -- 变成字节型
data = urllib.parse.urlencode(data).encode("utf-8")
之后就是封装成对象。然后进行发送请求了。
request = urllib.request.Request(url=url,data=data,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode("utf-8")
print(content)
print(type(content))
# 字符串变成json对象
import json
obj = json.loads(content)
print(obj)
案例练习:百度详细翻译
找到这个东西
表单数据就是要发送的数据
请求头就是全部复制过来就行,这个最主要的是Cookie,这个属于一种反爬手段,所以需要用到Cookie。
整体代码如下
import urllib.request
import urllib.parse
url = "https://fanyi.baidu.com/v2transapi?from=en&to=zh"
headers = {
# 'Accept': '*/*',
# 'Accept-Encoding': 'gzip, deflate, br',
# 'Accept-Language': 'zh-CN,zh;q=0.9',
# 'Connection': 'keep-alive',
# 'Content-Length': '135',
# 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': 'BIDUPSID=DAA8F9F0BD801A2929D96D69CF7EBF50; PSTM=1597202227; BAIDUID=DAA8F9F0BD801A29B2813502000BF8E9:SL=0:NR=10:FG=1; __yjs_duid=1_c19765bd685fa6fa12c2853fc392f8db1618999058029; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=R2bEZvTjFCNHQxdUV-cTZ-MzZrSGxhbUYwSkRkUWk2SkxxS3E2M2lqaFRLUlJoRVFBQUFBJCQAAAAAAAAAAAEAAAA3e~BTveK-9sHLZGF5AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFOc7GBTnOxgaW; BDUSS_BFESS=R2bEZvTjFCNHQxdUV-cTZ-MzZrSGxhbUYwSkRkUWk2SkxxS3E2M2lqaFRLUlJoRVFBQUFBJCQAAAAAAAAAAAEAAAA3e~BTveK-9sHLZGF5AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFOc7GBTnOxgaW; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BAIDUID_BFESS=DAA8F9F0BD801A29B2813502000BF8E9:SL=0:NR=10:FG=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=2; H_PS_PSSID=34435_31660_34405_34004_34073_34092_26350_34426_34323_22158_34390; delPer=1; BA_HECTOR=8185a12020018421b61gi6ka20q; BCLID=10943521300863382545; BDSFRCVID=boDOJexroG0YyvRHKn7hh7zlD_weG7bTDYLEOwXPsp3LGJLVJeC6EG0Pts1-dEu-EHtdogKK0mOTHv8F_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tR3aQ5rtKRTffjrnhPF3-44vXP6-hnjy3bRkX4Q4Wpv_Mnndjn6SQh4Wbttf5q3RymJ42-39LPO2hpRjyxv4y4Ldj4oxJpOJ-bCL0p5aHl51fbbvbURvD-ug3-7qqU5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIE3-oJqC8hMIt43f; BCLID_BFESS=10943521300863382545; BDSFRCVID_BFESS=boDOJexroG0YyvRHKn7hh7zlD_weG7bTDYLEOwXPsp3LGJLVJeC6EG0Pts1-dEu-EHtdogKK0mOTHv8F_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tR3aQ5rtKRTffjrnhPF3-44vXP6-hnjy3bRkX4Q4Wpv_Mnndjn6SQh4Wbttf5q3RymJ42-39LPO2hpRjyxv4y4Ldj4oxJpOJ-bCL0p5aHl51fbbvbURvD-ug3-7qqU5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIE3-oJqC8hMIt43f; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1629701482,1629702031,1629702343,1629704515; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1629704515; __yjs_st=2_MDBkZDdkNzg4YzYyZGU2NTM5NzBjZmQ0OTZiMWRmZGUxM2QwYzkwZTc2NTZmMmIxNDJkYzk4NzU1ZDUzN2U3Yjc4ZTJmYjE1YTUzMTljYWFkMWUwYmVmZGEzNmZjN2FlY2M3NDAzOThhZTY5NzI0MjVkMmQ0NWU3MWE1YTJmNGE5NDBhYjVlOWY3MTFiMWNjYTVhYWI0YThlMDVjODBkNWU2NjMwMzY2MjFhZDNkMzVhNGMzMGZkMWY2NjU5YzkxMDk3NTEzODJiZWUyMjEyYTk5YzY4ODUyYzNjZTJjMGM5MzhhMWE5YjU3NTM3NWZiOWQxNmU3MDVkODExYzFjN183XzliY2RhYjgz; ab_sr=1.0.1_ZTc2ZDFkMTU5ZTM0ZTM4MWVlNDU2MGEzYTM4MzZiY2I2MDIxNzY1Nzc1OWZjZGNiZWRhYjU5ZjYwZmNjMTE2ZjIzNmQxMTdiMzIzYTgzZjVjMTY0ZjM1YjMwZTdjMjhiNDRmN2QzMjMwNWRhZmUxYTJjZjZhNTViMGM2ODFlYjE5YTlmMWRjZDAwZGFmMDY4ZTFlNGJiZjU5YzE1MGIxN2FiYTU3NDgzZmI4MDdhMDM5NTQ0MjQxNDBiNzdhMDdl',
# 'Host': 'fanyi.baidu.com',
# 'Origin': 'https://fanyi.baidu.com',
# 'Referer': 'https://fanyi.baidu.com/?aldtype=16047',
# 'sec-ch-ua': '"Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"',
# 'sec-ch-ua-mobile': '?0',
# 'Sec-Fetch-Dest': 'empty',
# 'Sec-Fetch-Mode': 'cors',
# 'Sec-Fetch-Site': 'same-origin',
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36',
# 'X-Requested-With': 'XMLHttpRequest',
}
# 参数
data = {
'from':' en',
'to':' zh',
'query':' love',
'transtype':' realtime',
'simple_means_flag':' 3',
'sign':' 198772.518981',
'token':' a23a7b917d1d5908e8342bfc81987cab',
'domain':' common',
}
# post请求的参数,必须进行编码,并且调用encode方法
data = urllib.parse.urlencode(data).encode("utf-8")
request = urllib.request.Request(url=url,data=data,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode("utf-8")
import json
obj = json.loads(content)
print(obj)
print(content)