import requests
from lxml import etree
import pandas as pd
import random
url = "http://ip.khcha.com/ipsection_view.aspx?fl=2&type=S_FJ"
page_text = requests.get(url=url)
html = etree.HTML(page_text.text)
ul_ui_list = html.xpath('//*[@class="navr"]/ul/li')
data_lst = []
for i in ul_ui_list:
if len(i.xpath('.//@href')) > 0:
new_url = "http://ip.khcha.com/" + i.xpath('.//@href')[0]
page_text_next = requests.get(url=new_url)
html_next = etree.HTML(page_text_next.text)
ul_ui_list_next = html_next.xpath('//*[@class="navr"]/ul/li')
for i_next in ul_ui_list_next:
if len(i_next.xpath('.//@href')) > 0:
new_url_next = "http://ip.khcha.com/" + i_next.xpath('.//@href')[0]
html_next_next = etree.HTML(requests.get(url=new_url_next).text)
data_list = html_next_next.xpath('//*[@class="mainright"]/ul/li')
for data in data_list:
parse_list = new_url_next.split('&')
province = '福建省'
city = parse_list[1].split('=')[1]
area = parse_list[2].split('=')[1]
l_ip = data.xpath('.//span[@class = "l"]/text()')[0]
r_ip = data.xpath('.//span[@class = "r"]/text()')[0]
lst = [province, city, area, l_ip, r_ip]
data_lst.append(lst)
else:
continue
else:
continue
columns = ['province','city','area','start_ip','end_ip']
df = pd.DataFrame(data_lst,columns=columns)
columns_new = ["province","city","area","ip"]
data_frame = pd.DataFrame(columns=columns_new)
print(len(df))
print("++++++++++++++++++++++++++++++++++++++")
for index,rows in df.iterrows():
start_ip = rows['start_ip']
end_ip = rows['end_ip']
pre_ip = '.'.join(start_ip.split('.')[0:2]) + '.'
ip_num = (int(end_ip.split('.')[2]) - int(start_ip.split('.')[2]) + 1) * (int(end_ip.split('.')[3]) - int(start_ip.split('.')[3]) + 1)
print(ip_num)
print("**********************************")
for i in range(ip_num // 10):
random_ip_3 = random.randint(int(start_ip.split('.')[2]), int(end_ip.split('.')[2]))
random_ip_4 = random.randint(int(start_ip.split('.')[3]), int(end_ip.split('.')[3]))
post_ip = str(random_ip_3) + '.' + str(random_ip_4)
new_ip = pre_ip + post_ip
combined_strings = [rows['province'], rows['city'], rows['area'], new_ip]
print(len(data_frame))
data_frame.loc[len(data_frame)] = combined_strings
print(data_frame.head())
print("===================================================")
excel_path = "ip_福建.xlsx"
data_frame.to_excel(excel_path, index=False)