""" * project_name stu * package * file_name pt_craw_zh * <p> * description * </p> * @author <a href="mailto:joshualwork@163.com">joshua_liu</a> * @date 2022/1/4 15:49"""importjsonimportpymongoimportrequestsfromjsonpath_rwimportparseurl='https://www.lagou.com/jobs/v2/positionAjax.json?first=true&needAddtionalResult=false&city=%E9%83%91%E5%B7%9E' \
'&px=new&pn={}&kd=Java'max_pg=13mongo_client=pymongo.MongoClient('localhost', 27017, username='db_python', password='**..!123')
db_list=mongo_client.list_database_names()
db=mongo_client.db_pythontb_lg_position=db.tb_lg_positionheaders= {
"Cache-Control": "no-cache",
"Host": "www.lagou.com",
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"x-l-req-header": "{deviceType:1}",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ""Chrome/96.0.4664.110 Safari/537.36"}
''' ' @param ' @return ' @description 分页获取数据 ' @author <a href="mailto:joshualwork@163.com">joshua_liu</a> ' @date 2022/1/4 21:35'''defquery_json():
foriinrange(1, max_pg):
temp_url=url.format(i)
print(temp_url)
request=requests.post(temp_url, headers=headers)
json_str=request.content.decode()
fmt_json=json.loads(json_str)
json_path_hr_expr=parse("content.hrInfoMap")
json_path_pos_expr=parse("content.positionResult.result")
hr_infos=json_path_hr_expr.find(fmt_json)
pos_infos=json_path_pos_expr.find(fmt_json)
formatchinpos_infos:
match_value=match.valueprint(match_value)
tb_lg_position.insert_many(match_value)
defmain():
query_json()
if__name__=='__main__':
main()
""" * project_name stu * package * file_name pt_craw_zh * <p> * description * </p> * @author <a href="mailto:joshualwork@163.com">joshua_liu</a> * @date 2022/1/4 15:49"""importjsonimportpymongoimportrequestsfromjsonpath_rwimportparseurl='https://www.lagou.com/jobs/v2/positionAjax.json?first=true&needAddtionalResult=false&city=%E9%83%91%E5%B7%9E' \
'&px=new&pn={}&kd=Java'max_pg=13mongo_client=pymongo.MongoClient('localhost', 27017, username='db_python', password='**..!g1i2t3L')
db_list=mongo_client.list_database_names()
db=mongo_client.db_pythontb_lg_position=db.tb_lg_positionheaders= {
"Cache-Control": "no-cache",
"Host": "www.lagou.com",
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"x-l-req-header": "{deviceType:1}",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ""Chrome/96.0.4664.110 Safari/537.36"}
''' ' @param ' @return ' @description 分页获取数据 ' @author <a href="mailto:joshualwork@163.com">joshua_liu</a> ' @date 2022/1/4 21:35'''defquery_json():
foriinrange(1, max_pg):
temp_url=url.format(i)
print(temp_url)
request=requests.post(temp_url, headers=headers)
json_str=request.content.decode()
fmt_json=json.loads(json_str)
json_path_hr_expr=parse("content.hrInfoMap")
json_path_pos_expr=parse("content.positionResult.result")
hr_infos=json_path_hr_expr.find(fmt_json)
pos_infos=json_path_pos_expr.find(fmt_json)
formatchinpos_infos:
match_value=match.valueprint(match_value)
tb_lg_position.insert_many(match_value)
defmain():
query_json()
if__name__=='__main__':
main()