使用Hive Sqoop Django实现天气数据分析与可视化-开发者社区-阿里云

基于Hive的天气情况大数据分析系统（通过hive进行大数据分析将分析的数据通过sqoop导入到mysql，通过Django基于mysql的数据做可视化）

2024-06-12 859

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

简介： 基于Hive的天气情况大数据分析系统（通过hive进行大数据分析将分析的数据通过sqoop导入到mysql，通过Django基于mysql的数据做可视化）

基于Hive的天气情况大数据分析系统（通过hive进行大数据分析将分析的数据通过sqoop导入到mysql，通过Django基于mysql的数据做可视化）

1.Hive介绍：

Hive是建立在Hadoop之上的数据仓库基础架构，它提供了类似于SQL的语言（HQL），可以对大规模数据集进行查询和分析。通过Hive，我们可以在分布式存储系统中进行复杂的数据处理和分析。

2.Sqoop简介：

Sqoop是一个用于在Apache Hadoop和关系型数据库之间传输数据的工具。我们可以使用Sqoop将Hive中的分析结果导出到关系型数据库中，如MySQL，以便进一步处理和可视化。

3.Django概述：

Django是一个高级的Python Web框架，它提供了一系列工具和库，用于快速构建Web应用程序。我们可以利用Django连接到MySQL数据库，处理数据，并将其呈现为可视化界面。

Hive大数据分析sql，基于数据创建hive表，然后进行数据分析

-- 创建数据库
CREATE DATABASE IF NOT EXISTS big_data;

-- 切换到big_data数据库
USE big_data;

 load data local inpath '/export/server/28' INTO TABLE weather_data;
-- 创建weather_data表
CREATE TABLE IF NOT EXISTS weather_data (
    `date` STRING,
    high_temperature STRING,
    low_temperature STRING,
    weather STRING,
    wind_direction STRING,
    city STRING
)ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';

-- 插入数据到weather_data表（示例数据）
INSERT INTO TABLE weather_data VALUES
(1, '2022-01-01 周六', '6°', '-7°', '晴', '西北风3级', '北京'),
(2, '2022-01-02 周日', '2°', '-7°', '多云', '南风2级', '北京');

-- 创建etl_weather_data表
CREATE TABLE IF NOT EXISTS etl_weather_data (
    `date` STRING,
    day_of_week STRING,
    high_temperature INT,
    low_temperature INT,
    weather STRING,
    wind_direction STRING,
    wind_speed STRING,
    city STRING
);

-- 插入数据到etl_weather_data表
INSERT INTO TABLE etl_weather_data
SELECT
    SUBSTR(`date`, 1, INSTR(`date`, ' ') - 1) AS `date`,
    SUBSTR(`date`, INSTR(`date`, ' ') + 1) AS day_of_week,
    CAST(SUBSTR(high_temperature, 1, INSTR(high_temperature, '°') - 1) AS INT) AS high_temperature,
    CAST(SUBSTR(low_temperature, 1, INSTR(low_temperature, '°') - 1) AS INT) AS low_temperature,
    weather,
    REGEXP_REPLACE(SUBSTR(wind_direction, 1, INSTR(wind_direction, '级') - 1), '[0-9]', '') AS wind_direction,
    SUBSTR(SUBSTR(wind_direction, INSTR(wind_direction, '风') + 1),1,1) AS wind_speed,
    city
FROM
    weather_data;

-- 1.统计一年中每个城市晴天个数的top10
CREATE TABLE IF NOT EXISTS top_sunny_cities (
    city STRING,
    sunny_days_count INT
);

INSERT INTO TABLE top_sunny_cities
SELECT
    city,
    COUNT(*) AS sunny_days_count
FROM
    etl_weather_data
WHERE
    weather LIKE '%晴%'
GROUP BY
    city
ORDER BY
    sunny_days_count DESC
LIMIT 10;


-- 2.统计北京一年中每个月的温差变化
CREATE TABLE IF NOT EXISTS monthly_max_temperature_difference (
    month_year STRING,
    max_temperature_difference INT
);

INSERT INTO TABLE monthly_max_temperature_difference
SELECT
    CONCAT(YEAR(`date`), '-', LPAD(MONTH(`date`), 2, '0')) AS month_year,
    MAX(high_temperature - low_temperature) AS max_temperature_difference
FROM
    etl_weather_data
WHERE
    city = '北京'
GROUP BY
    YEAR(`date`), MONTH(`date`);

-- 3.统计城市出现3级以上风速最多的10个城市
CREATE TABLE IF NOT EXISTS top_cities_high_wind (
    city STRING,
    high_wind_days_count INT
);

INSERT INTO TABLE top_cities_high_wind
SELECT
    city,
    COUNT(*) AS high_wind_days_count
FROM
    etl_weather_data
WHERE
    CAST(wind_speed AS INT) >= 3
GROUP BY
    city
ORDER BY
    high_wind_days_count DESC
LIMIT 10;

基于sqoop将数据导入到mysql中

sqoop export \
  --connect jdbc:mysql://192.168.138.1:3306/big_data \
  --username root --password '123456' \
  --table top_sunny_cities_sqoop \
  --export-dir /hive/warehouse/big_data.db/big_data.dbbig_data.db/top_sunny_cities \
  --input-fields-terminated-by '\001' \
  --input-lines-terminated-by '\n';

sqoop export \
  --connect jdbc:mysql:// 192.168.138.1:3306/big_data \
  --username root --password 123456 \
  --table monthly_max_temperature_difference \
  --export-dir /user/hive/warehouse/big_data.db/big_data.dbmonthly_max_temperature_difference \
  --input-fields-terminated-by '\001' \
  --input-lines-terminated-by '\n'

sqoop export \
  --connect jdbc:mysql:// 192.168.138.1:3306/big_data \
  --username root --password 123456 \
  --table top_cities_high_wind \
  --export-dir /user/hive/warehouse/big_data.db/big_data.dbtop_cities_high_wind \
  --input-fields-terminated-by '\001' \
  --input-lines-terminated-by '\n'

基于mysql数据使用Django做数据可视化

from django.contrib.auth.decorators import login_required
from django.http import HttpResponse
from django.shortcuts import render, redirect
from django.shortcuts import render
from pyecharts import options as opts
from pyecharts.charts import Line, Bar, Pie, Grid
# Create your views here.


from django.shortcuts import render
from pyecharts.globals import ThemeType

from api.service.task_service import get_user, top_sunny_cities, monthly_max_temperature_difference, \
    top_cities_high_wind, top_rainy_cities, monthly_rainy_days, yearly_min_temperatures, daily_wind_speed, \
    daily_temperature_difference, register_user


def login_page(request):
    if request.method == 'POST':
        username = request.POST.get('username')
        password = request.POST.get('password')
        user = get_user(username,password)
        if user is not None:
            return redirect('home')
        else:
            return render(request, 'login.html', {'error_message': 'Invalid login credentials.'})

    return render(request, 'login.html')

def register_view(request):
    # 处理注册逻辑
    if request.method == 'GET':
        username = request.GET.get('username')
        password = request.GET.get('password')
        if username and password:
            register_user(username,password)
            return HttpResponse("注册成功！")
        return render(request, 'register.html')  # 使用你的注册模板路径

def home(request):
    print(2)
    return render(request, 'home.html')

def data_analysis(request, button_id):
    return render(request, 'data_analysis.html', {'button_id': button_id})

def data_analysis(request, button_id):


    # 根据按钮 ID 进行不同的处理
    if button_id == 1:
        x,y = top_sunny_cities()
        line_chart = (
            Line()
            .add_xaxis(xaxis_data=x)
            .add_yaxis(series_name="晴天个数", y_axis=y)
            .set_global_opts(title_opts=opts.TitleOpts(title="一年中每个城市晴天个数的top10"))
        )
        chart_html = line_chart.render_embed()
        button_name = "折线图"
    elif button_id == 2:
        x,y = monthly_max_temperature_difference()
        line_chart = (
            Line()
            .add_xaxis(xaxis_data=x)
            .add_yaxis(series_name="温差值", y_axis=y)
            .set_global_opts(title_opts=opts.TitleOpts(title="北京一年中每个月的温差变化"))
        )
        chart_html = line_chart.render_embed()
        button_name = "折线图"
    elif button_id == 3:
        x,y = top_cities_high_wind()
        bar_chart = (
            Bar()
            .add_xaxis(xaxis_data=x)
            .add_yaxis(series_name="3级风速次数",y_axis=y)
            .set_global_opts(title_opts=opts.TitleOpts(title="出现3级以上风速的top10个城市"))
        )
        chart_html = bar_chart.render_embed()
        button_name = "条形图"
    elif button_id == 4:
        x, y = top_rainy_cities()
        bar_chart = (
            Bar()
            .add_xaxis(xaxis_data=x)
            .add_yaxis(series_name="雨天数量", y_axis=y)
            .set_global_opts(title_opts=opts.TitleOpts(title="多雨城市的top10"))
        )
        chart_html = bar_chart.render_embed()
        button_name = "条形图"

    elif button_id == 5:
        x, y = monthly_rainy_days()
        pie = Pie()
        pie.add("", list(zip(x, y)))
        pie.set_global_opts(title_opts={"text": "杭州每月雨天变化", "subtext": "2022年"},
                            legend_opts=opts.LegendOpts(orient="vertical", pos_right="right", pos_top="center"))
        chart_html = pie.render_embed()
        button_name = "饼图"

    elif button_id == 6:
        x, y = yearly_min_temperatures()
        line_chart = (
            Line()
            .add_xaxis(xaxis_data=x)
            .add_yaxis(series_name="温度", y_axis=y)
            .set_global_opts(title_opts=opts.TitleOpts(title="城市一年中最低的温度top10"))
        )
        chart_html = line_chart.render_embed()
        button_name = "折线图"

    elif button_id == 7:
        x,y=daily_temperature_difference()
        # 创建饼图
        pie = (
            Pie(init_opts=opts.InitOpts(width="800px", height="600px"))
            .add(
                series_name="南京10月份1～10号温差变化",
                data_pair=list(zip(x, y)),
                radius=["40%", "75%"],  # 设置内外半径，实现空心效果
                label_opts=opts.LabelOpts(is_show=True, position="inside"),
            )
            .set_global_opts(title_opts=opts.TitleOpts(title="南京10月份1～10号温差变化"),
                             legend_opts=opts.LegendOpts(orient="vertical", pos_right="right", pos_top="center"),
                             )
            .set_series_opts(  # 设置系列选项，调整 is_show 阈值
                label_opts=opts.LabelOpts(is_show=True)
            )
        )
        chart_html = pie.render_embed()
        button_name = "饼图"

    elif button_id == 8:
        x,y=daily_wind_speed()

        bar_chart = (
            Bar()
            .add_xaxis(xaxis_data=x)
            .add_yaxis(series_name="风速级别", y_axis=y)
            .set_global_opts(title_opts=opts.TitleOpts(title="南京10月份每天的风速变化"))
        )
        chart_html = bar_chart.render_embed()
        button_name = "条形图"

    return render(request, 'data_analysis.html', {'chart_html': chart_html, 'button_name': button_name})