基于Hive的天气情况大数据分析系统(通过hive进行大数据分析将分析的数据通过sqoop导入到mysql,通过Django基于mysql的数据做可视化)
1.Hive介绍:
- Hive是建立在Hadoop之上的数据仓库基础架构,它提供了类似于SQL的语言(HQL),可以对大规模数据集进行查询和分析。通过Hive,我们可以在分布式存储系统中进行复杂的数据处理和分析。
2.Sqoop简介:
- Sqoop是一个用于在Apache Hadoop和关系型数据库之间传输数据的工具。我们可以使用Sqoop将Hive中的分析结果导出到关系型数据库中,如MySQL,以便进一步处理和可视化。
3.Django概述:
- Django是一个高级的Python Web框架,它提供了一系列工具和库,用于快速构建Web应用程序。我们可以利用Django连接到MySQL数据库,处理数据,并将其呈现为可视化界面。
Hive大数据分析sql,基于数据创建hive表,然后进行数据分析
-- 创建数据库 CREATE DATABASE IF NOT EXISTS big_data; -- 切换到big_data数据库 USE big_data; load data local inpath '/export/server/28' INTO TABLE weather_data; -- 创建weather_data表 CREATE TABLE IF NOT EXISTS weather_data ( `date` STRING, high_temperature STRING, low_temperature STRING, weather STRING, wind_direction STRING, city STRING )ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; -- 插入数据到weather_data表(示例数据) INSERT INTO TABLE weather_data VALUES (1, '2022-01-01 周六', '6°', '-7°', '晴', '西北风3级', '北京'), (2, '2022-01-02 周日', '2°', '-7°', '多云', '南风2级', '北京'); -- 创建etl_weather_data表 CREATE TABLE IF NOT EXISTS etl_weather_data ( `date` STRING, day_of_week STRING, high_temperature INT, low_temperature INT, weather STRING, wind_direction STRING, wind_speed STRING, city STRING ); -- 插入数据到etl_weather_data表 INSERT INTO TABLE etl_weather_data SELECT SUBSTR(`date`, 1, INSTR(`date`, ' ') - 1) AS `date`, SUBSTR(`date`, INSTR(`date`, ' ') + 1) AS day_of_week, CAST(SUBSTR(high_temperature, 1, INSTR(high_temperature, '°') - 1) AS INT) AS high_temperature, CAST(SUBSTR(low_temperature, 1, INSTR(low_temperature, '°') - 1) AS INT) AS low_temperature, weather, REGEXP_REPLACE(SUBSTR(wind_direction, 1, INSTR(wind_direction, '级') - 1), '[0-9]', '') AS wind_direction, SUBSTR(SUBSTR(wind_direction, INSTR(wind_direction, '风') + 1),1,1) AS wind_speed, city FROM weather_data; -- 1.统计一年中每个城市晴天个数的top10 CREATE TABLE IF NOT EXISTS top_sunny_cities ( city STRING, sunny_days_count INT ); INSERT INTO TABLE top_sunny_cities SELECT city, COUNT(*) AS sunny_days_count FROM etl_weather_data WHERE weather LIKE '%晴%' GROUP BY city ORDER BY sunny_days_count DESC LIMIT 10; -- 2.统计北京一年中每个月的温差变化 CREATE TABLE IF NOT EXISTS monthly_max_temperature_difference ( month_year STRING, max_temperature_difference INT ); INSERT INTO TABLE monthly_max_temperature_difference SELECT CONCAT(YEAR(`date`), '-', LPAD(MONTH(`date`), 2, '0')) AS month_year, MAX(high_temperature - low_temperature) AS max_temperature_difference FROM etl_weather_data WHERE city = '北京' GROUP BY YEAR(`date`), MONTH(`date`); -- 3.统计城市出现3级以上风速最多的10个城市 CREATE TABLE IF NOT EXISTS top_cities_high_wind ( city STRING, high_wind_days_count INT ); INSERT INTO TABLE top_cities_high_wind SELECT city, COUNT(*) AS high_wind_days_count FROM etl_weather_data WHERE CAST(wind_speed AS INT) >= 3 GROUP BY city ORDER BY high_wind_days_count DESC LIMIT 10;
基于sqoop将数据导入到mysql中
sqoop export \ --connect jdbc:mysql://192.168.138.1:3306/big_data \ --username root --password '123456' \ --table top_sunny_cities_sqoop \ --export-dir /hive/warehouse/big_data.db/big_data.dbbig_data.db/top_sunny_cities \ --input-fields-terminated-by '\001' \ --input-lines-terminated-by '\n'; sqoop export \ --connect jdbc:mysql:// 192.168.138.1:3306/big_data \ --username root --password 123456 \ --table monthly_max_temperature_difference \ --export-dir /user/hive/warehouse/big_data.db/big_data.dbmonthly_max_temperature_difference \ --input-fields-terminated-by '\001' \ --input-lines-terminated-by '\n' sqoop export \ --connect jdbc:mysql:// 192.168.138.1:3306/big_data \ --username root --password 123456 \ --table top_cities_high_wind \ --export-dir /user/hive/warehouse/big_data.db/big_data.dbtop_cities_high_wind \ --input-fields-terminated-by '\001' \ --input-lines-terminated-by '\n'
基于mysql数据使用Django做数据可视化
from django.contrib.auth.decorators import login_required from django.http import HttpResponse from django.shortcuts import render, redirect from django.shortcuts import render from pyecharts import options as opts from pyecharts.charts import Line, Bar, Pie, Grid # Create your views here. from django.shortcuts import render from pyecharts.globals import ThemeType from api.service.task_service import get_user, top_sunny_cities, monthly_max_temperature_difference, \ top_cities_high_wind, top_rainy_cities, monthly_rainy_days, yearly_min_temperatures, daily_wind_speed, \ daily_temperature_difference, register_user def login_page(request): if request.method == 'POST': username = request.POST.get('username') password = request.POST.get('password') user = get_user(username,password) if user is not None: return redirect('home') else: return render(request, 'login.html', {'error_message': 'Invalid login credentials.'}) return render(request, 'login.html') def register_view(request): # 处理注册逻辑 if request.method == 'GET': username = request.GET.get('username') password = request.GET.get('password') if username and password: register_user(username,password) return HttpResponse("注册成功!") return render(request, 'register.html') # 使用你的注册模板路径 def home(request): print(2) return render(request, 'home.html') def data_analysis(request, button_id): return render(request, 'data_analysis.html', {'button_id': button_id}) def data_analysis(request, button_id): # 根据按钮 ID 进行不同的处理 if button_id == 1: x,y = top_sunny_cities() line_chart = ( Line() .add_xaxis(xaxis_data=x) .add_yaxis(series_name="晴天个数", y_axis=y) .set_global_opts(title_opts=opts.TitleOpts(title="一年中每个城市晴天个数的top10")) ) chart_html = line_chart.render_embed() button_name = "折线图" elif button_id == 2: x,y = monthly_max_temperature_difference() line_chart = ( Line() .add_xaxis(xaxis_data=x) .add_yaxis(series_name="温差值", y_axis=y) .set_global_opts(title_opts=opts.TitleOpts(title="北京一年中每个月的温差变化")) ) chart_html = line_chart.render_embed() button_name = "折线图" elif button_id == 3: x,y = top_cities_high_wind() bar_chart = ( Bar() .add_xaxis(xaxis_data=x) .add_yaxis(series_name="3级风速次数",y_axis=y) .set_global_opts(title_opts=opts.TitleOpts(title="出现3级以上风速的top10个城市")) ) chart_html = bar_chart.render_embed() button_name = "条形图" elif button_id == 4: x, y = top_rainy_cities() bar_chart = ( Bar() .add_xaxis(xaxis_data=x) .add_yaxis(series_name="雨天数量", y_axis=y) .set_global_opts(title_opts=opts.TitleOpts(title="多雨城市的top10")) ) chart_html = bar_chart.render_embed() button_name = "条形图" elif button_id == 5: x, y = monthly_rainy_days() pie = Pie() pie.add("", list(zip(x, y))) pie.set_global_opts(title_opts={"text": "杭州每月雨天变化", "subtext": "2022年"}, legend_opts=opts.LegendOpts(orient="vertical", pos_right="right", pos_top="center")) chart_html = pie.render_embed() button_name = "饼图" elif button_id == 6: x, y = yearly_min_temperatures() line_chart = ( Line() .add_xaxis(xaxis_data=x) .add_yaxis(series_name="温度", y_axis=y) .set_global_opts(title_opts=opts.TitleOpts(title="城市一年中最低的温度top10")) ) chart_html = line_chart.render_embed() button_name = "折线图" elif button_id == 7: x,y=daily_temperature_difference() # 创建饼图 pie = ( Pie(init_opts=opts.InitOpts(width="800px", height="600px")) .add( series_name="南京10月份1~10号温差变化", data_pair=list(zip(x, y)), radius=["40%", "75%"], # 设置内外半径,实现空心效果 label_opts=opts.LabelOpts(is_show=True, position="inside"), ) .set_global_opts(title_opts=opts.TitleOpts(title="南京10月份1~10号温差变化"), legend_opts=opts.LegendOpts(orient="vertical", pos_right="right", pos_top="center"), ) .set_series_opts( # 设置系列选项,调整 is_show 阈值 label_opts=opts.LabelOpts(is_show=True) ) ) chart_html = pie.render_embed() button_name = "饼图" elif button_id == 8: x,y=daily_wind_speed() bar_chart = ( Bar() .add_xaxis(xaxis_data=x) .add_yaxis(series_name="风速级别", y_axis=y) .set_global_opts(title_opts=opts.TitleOpts(title="南京10月份每天的风速变化")) ) chart_html = bar_chart.render_embed() button_name = "条形图" return render(request, 'data_analysis.html', {'chart_html': chart_html, 'button_name': button_name})
展示Django项目运行结果