基于spark的大数据分析预测地震受灾情况的系统设计
在本篇博客中,我们将介绍如何使用Apache Spark框架进行地震受灾情况的预测。我们将结合数据分析、特征工程、模型训练和评估等步骤,最终建立一个预测模型来预测地震造成的破坏程度,同时使用可视化大屏的方式展示数据的分布。
1、数据来源和准备
我们使用了合并后的地震数据作为我们的数据集。首先,让我们来看一下我们的数据集
# 读取数据 data = spark.read.csv("../data_ana/merged_data.csv", header=True, inferSchema=True).sample(False, 0.1, seed=42) data.show()
2、数据预处理和特征工程
在数据预处理和特征工程阶段,我们将对数据进行清洗、转换和特征提取等操作。具体步骤如下:
# 数据预处理和特征工程 string_cols = ['gender_individual', 'presence_in_household', 'disability_individual', 'education_level_individual','marital_status_individual', 'legal_ownership_status', 'land_surface_condition', 'foundation_type','roof_type', 'ground_floor_type', 'other_floor_type', 'position', 'plan_configuration','condition_post_eq', 'damage_grade_x', 'technical_solution_proposed_x', 'area_assesed', 'technical_solution_proposed_y','vdcmun_name', 'district_name'] # 创建 StringIndexer 和 OneHotEncoder 对象 indexers = [StringIndexer(inputCol=column, outputCol=column+"_index",handleInvalid="skip") for column in string_cols] encoder = OneHotEncoder(inputCols=[column+"_index" for column in string_cols], outputCols=[column+"_encoded" for column in string_cols]) # 创建特征向量 assembler = VectorAssembler(inputCols=encoder.getOutputCols(), outputCol="features") # 创建Pipeline pipeline = Pipeline(stages=indexers + [encoder, assembler]) data_final = pipeline.fit(data).transform(data) data_final.show()
3、异常数据处理
在异常数据处理阶段,我们将处理可能存在的异常情况,确保数据的完整性和准确性:
# 使用正则表达式提取数字部分 data_final = data_final.withColumn("damage_grade_y_numeric", regexp_extract(data_final["damage_grade_y"], r'\d+', 0)) # 将列转换为 numeric 类型 data_final = data_final.withColumn("damage_grade_y_numeric", data_final["damage_grade_y_numeric"].cast("int")) # 显示转换后的结果 data_final.select("damage_grade_y", "damage_grade_y_numeric").show()
4、标题模型训练和评估
在模型训练和评估阶段,我们将使用随机森林分类器进行模型训练,并评估模型在测试集上的表现:
# 划分数据集为训练集和测试集 (train_data, test_data) = data_final.randomSplit([0.8, 0.2], seed=1234) # 初始化随机森林分类器 rf = RandomForestClassifier(labelCol="damage_grade_y_numeric", featuresCol="features", numTrees=10) # 训练模型 model = rf.fit(train_data) # 在测试集上进行预测 predictions = model.transform(test_data) # 模型评估 evaluator = MulticlassClassificationEvaluator(labelCol="damage_grade_y_numeric", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) print("Test Accuracy = {:.2f}%".format(accuracy * 100))
标题5、可视化大屏实现与展示
为了更直观地展示预测结果,我们设计了一个可视化大屏。该大屏将包括地图展示、受灾情况分布图以及预测结果展示等内容,以帮助用户更好地理解地震造成的破坏程度。
<html><head> <meta charset="utf-8"> <title>www.husonghe.com</title> <style> html { height: 100%; background-image: -webkit-radial-gradient(ellipse farthest-corner at center center, #1b44e4 0%, #020f3a 100%); background-image: radial-gradient(ellipse farthest-corner at center center, #1b44e4 0%, #020f3a 100%); cursor: move; } body { width: 100%; margin: 0; overflow: hidden; } </style> </head> <body> <canvas id="canv" width="1920" height="572"></canvas> <script> var num = 200; var w = window.innerWidth; var h = window.innerHeight; var max = 100; var _x = 0; var _y = 0; var _z = 150; var dtr = function(d) { return d * Math.PI / 180; }; var rnd = function() { return Math.sin(Math.floor(Math.random() * 360) * Math.PI / 180); }; var dist = function(p1, p2, p3) { return Math.sqrt(Math.pow(p2.x - p1.x, 2) + Math.pow(p2.y - p1.y, 2) + Math.pow(p2.z - p1.z, 2)); }; var cam = { obj: { x: _x, y: _y, z: _z }, dest: { x: 0, y: 0, z: 1 }, dist: { x: 0, y: 0, z: 200 }, ang: { cplane: 0, splane: 0, ctheta: 0, stheta: 0 }, zoom: 1, disp: { x: w / 2, y: h / 2, z: 0 }, upd: function() { cam.dist.x = cam.dest.x - cam.obj.x; cam.dist.y = cam.dest.y - cam.obj.y; cam.dist.z = cam.dest.z - cam.obj.z; cam.ang.cplane = -cam.dist.z / Math.sqrt(cam.dist.x * cam.dist.x + cam.dist.z * cam.dist.z); cam.ang.splane = cam.dist.x / Math.sqrt(cam.dist.x * cam.dist.x + cam.dist.z * cam.dist.z); cam.ang.ctheta = Math.sqrt(cam.dist.x * cam.dist.x + cam.dist.z * cam.dist.z) / Math.sqrt(cam.dist.x * cam.dist.x + cam.dist.y * cam.dist.y + cam.dist.z * cam.dist.z); cam.ang.stheta = -cam.dist.y / Math.sqrt(cam.dist.x * cam.dist.x + cam.dist.y * cam.dist.y + cam.dist.z * cam.dist.z); } }; var trans = { parts: { sz: function(p, sz) { return { x: p.x * sz.x, y: p.y * sz.y, z: p.z * sz.z }; }, rot: { x: function(p, rot) { return { x: p.x, y: p.y * Math.cos(dtr(rot.x)) - p.z * Math.sin(dtr(rot.x)), z: p.y * Math.sin(dtr(rot.x)) + p.z * Math.cos(dtr(rot.x)) }; }, y: function(p, rot) { return { x: p.x * Math.cos(dtr(rot.y)) + p.z * Math.sin(dtr(rot.y)), y: p.y, z: -p.x * Math.sin(dtr(rot.y)) + p.z * Math.cos(dtr(rot.y)) }; }, z: function(p, rot) { return { x: p.x * Math.cos(dtr(rot.z)) - p.y * Math.sin(dtr(rot.z)), y: p.x * Math.sin(dtr(rot.z)) + p.y * Math.cos(dtr(rot.z)), z: p.z }; } }, pos: function(p, pos) { return { x: p.x + pos.x, y: p.y + pos.y, z: p.z + pos.z }; } }, pov: { plane: function(p) { return { x: p.x * cam.ang.cplane + p.z * cam.ang.splane, y: p.y, z: p.x * -cam.ang.splane + p.z * cam.ang.cplane }; }, theta: function(p) { return { x: p.x, y: p.y * cam.ang.ctheta - p.z * cam.ang.stheta, z: p.y * cam.ang.stheta + p.z * cam.ang.ctheta }; }, set: function(p) { return { x: p.x - cam.obj.x, y: p.y - cam.obj.y, z: p.z - cam.obj.z }; } }, persp: function(p) { return { x: p.x * cam.dist.z / p.z * cam.zoom, y: p.y * cam.dist.z / p.z * cam.zoom, z: p.z * cam.zoom, p: cam.dist.z / p.z }; }, disp: function(p, disp) { return { x: p.x + disp.x, y: -p.y + disp.y, z: p.z + disp.z, p: p.p }; }, steps: function(_obj_, sz, rot, pos, disp) { var _args = trans.parts.sz(_obj_, sz); _args = trans.parts.rot.x(_args, rot); _args = trans.parts.rot.y(_args, rot); _args = trans.parts.rot.z(_args, rot); _args = trans.parts.pos(_args, pos); _args = trans.pov.plane(_args); _args = trans.pov.theta(_args); _args = trans.pov.set(_args); _args = trans.persp(_args); _args = trans.disp(_args, disp); return _args; } }; (function() { "use strict"; var threeD = function(param) { this.transIn = {}; this.transOut = {}; this.transIn.vtx = (param.vtx); this.transIn.sz = (param.sz); this.transIn.rot = (param.rot); this.transIn.pos = (param.pos); }; threeD.prototype.vupd = function() { this.transOut = trans.steps( this.transIn.vtx, this.transIn.sz, this.transIn.rot, this.transIn.pos, cam.disp ); }; var Build = function() { this.vel = 0.04; this.lim = 360; this.diff = 200; this.initPos = 100; this.toX = _x; this.toY = _y; this.go(); }; Build.prototype.go = function() { this.canvas = document.getElementById("canv"); this.canvas.width = window.innerWidth; this.canvas.height = window.innerHeight; this.$ = canv.getContext("2d"); this.$.globalCompositeOperation = 'source-over'; this.varr = []; this.dist = []; this.calc = []; for (var i = 0, len = num; i < len; i++) { this.add(); } this.rotObj = { x: 0, y: 0, z: 0 }; this.objSz = { x: w / 5, y: h / 5, z: w / 5 }; }; Build.prototype.add = function() { this.varr.push(new threeD({ vtx: { x: rnd(), y: rnd(), z: rnd() }, sz: { x: 0, y: 0, z: 0 }, rot: { x: 20, y: -20, z: 0 }, pos: { x: this.diff * Math.sin(360 * Math.random() * Math.PI / 180), y: this.diff * Math.sin(360 * Math.random() * Math.PI / 180), z: this.diff * Math.sin(360 * Math.random() * Math.PI / 180) } })); this.calc.push({ x: 360 * Math.random(), y: 360 * Math.random(), z: 360 * Math.random() }); }; Build.prototype.upd = function() { cam.obj.x += (this.toX - cam.obj.x) * 0.05; cam.obj.y += (this.toY - cam.obj.y) * 0.05; }; Build.prototype.draw = function() { this.$.clearRect(0, 0, this.canvas.width, this.canvas.height); cam.upd(); this.rotObj.x += 0.1; this.rotObj.y += 0.1; this.rotObj.z += 0.1; for (var i = 0; i < this.varr.length; i++) { for (var val in this.calc[i]) { if (this.calc[i].hasOwnProperty(val)) { this.calc[i][val] += this.vel; if (this.calc[i][val] > this.lim) this.calc[i][val] = 0; } } this.varr[i].transIn.pos = { x: this.diff * Math.cos(this.calc[i].x * Math.PI / 180), y: this.diff * Math.sin(this.calc[i].y * Math.PI / 180), z: this.diff * Math.sin(this.calc[i].z * Math.PI / 180) }; this.varr[i].transIn.rot = this.rotObj; this.varr[i].transIn.sz = this.objSz; this.varr[i].vupd(); if (this.varr[i].transOut.p < 0) continue; var g = this.$.createRadialGradient(this.varr[i].transOut.x, this.varr[i].transOut.y, this.varr[i].transOut.p, this.varr[i].transOut.x, this.varr[i].transOut.y, this.varr[i].transOut.p * 2); this.$.globalCompositeOperation = 'lighter'; g.addColorStop(0, 'hsla(255, 255%, 255%, 1)'); g.addColorStop(.5, 'hsla(' + (i + 2) + ',85%, 40%,1)'); g.addColorStop(1, 'hsla(' + (i) + ',85%, 40%,.5)'); this.$.fillStyle = g; this.$.beginPath(); this.$.arc(this.varr[i].transOut.x, this.varr[i].transOut.y, this.varr[i].transOut.p * 2, 0, Math.PI * 2, false); this.$.fill(); this.$.closePath(); } }; Build.prototype.anim = function() { window.requestAnimationFrame = (function() { return window.requestAnimationFrame || function(callback, element) { window.setTimeout(callback, 1000 / 60); }; })(); var anim = function() { this.upd(); this.draw(); window.requestAnimationFrame(anim); }.bind(this); window.requestAnimationFrame(anim); }; Build.prototype.run = function() { this.anim(); window.addEventListener('mousemove', function(e) { this.toX = (e.clientX - this.canvas.width / 2) * -0.8; this.toY = (e.clientY - this.canvas.height / 2) * 0.8; }.bind(this)); window.addEventListener('touchmove', function(e) { e.preventDefault(); this.toX = (e.touches[0].clientX - this.canvas.width / 2) * -0.8; this.toY = (e.touches[0].clientY - this.canvas.height / 2) * 0.8; }.bind(this)); window.addEventListener('mousedown', function(e) { for (var i = 0; i < 100; i++) { this.add(); } }.bind(this)); window.addEventListener('touchstart', function(e) { e.preventDefault(); for (var i = 0; i < 100; i++) { this.add(); } }.bind(this)); }; var app = new Build(); app.run(); })(); window.addEventListener('resize', function() { canvas.width = w = window.innerWidth; canvas.height = h = window.innerHeight; }, false); </script> </body></html>