import datetime
from urllib.parse import quote_plus
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from pymysql import connect
from sqlalchemy import create_engine,types
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
url = 'https://www.shishihuilv.com/zuixin/'
response = requests.get(url=url,headers=headers,timeout=60)
soup = BeautifulSoup(response.text, 'html.parser')
span_tags = soup.find_all('span')
span_contents = [span.get_text(strip=True) for span in span_tags]
span_contents = span_contents[0:-1]
list_num = int(len(span_contents) / 4)
result = np.array_split(span_contents, list_num)
result = [arr.tolist() for arr in result]
columns = ['currency_for_exchange','currency_code','last_exchange_rate','reverse_exchange_rate','rate_date']
df = pd.DataFrame(columns=columns)
for i in result:
i.append(datetime.date.today())
df.loc[len(df)] = i
password = quote_plus('Abc@123')
connect_str = "mysql+pymysql://username:{}@127.0.0.0:3306/testdb".format(password)
engine = create_engine(connect_str)
dtype = {
'currency_for_exchange': types.VARCHAR(64),
'currency_code':types.VARCHAR(32),
'last_exchange_rate':types.DECIMAL(10,6),
'reverse_exchange_rate':types.DECIMAL(10,6),
'rate_date':types.DATE
}
df.to_sql(
name='ods_currency_exchange_rate_df',
con=engine,
if_exists='replace',
index=False,
dtype=dtype,
chunksize=500
)