一、概述
三次样条插值是一种采用分段函数的形式对数据进行插值的方法。为了从分段函数中选择一种可行的插值方法,需要考虑一些问题,比如:
- 插值函数是否处处连续
- 插值函数的一阶和二阶导数是否连续
- 插值函数的平滑度是否符合实际情况
三次样条插值通过使用三次多项式对每个小区间进行拟合,再通过选取适当的边界条件对插值函数的一阶和二阶导数进行约束,从而使得插值函数在插值节点处的一阶和二阶导数与原函数相等。三次样条插值方法得到的插值函数较为平滑,逼近精度较高,可以避免插值多项式过度拟合和震荡的问题。
二、数据集
- 数据预处理
library(ncdf4) ncdata <- nc_open("data.nc") lon <- ncvar_get(ncdata,'lon') # 经度 lat <- ncvar_get(ncdata,'lat') # 纬度 time <- ncvar_get(ncdata,'time') # 时间 lwe_thickness <- ncvar_get(ncdata,'lwe_thickness') # 覆盖厚度 # 这份测绘数据起始时间是2002.01.01,time的值表示过去的天数 time <- as.Date("2002-01-01") + time library(data.table) # 对数据进行重组和转置,以扁平化数据结构 lwe_thickness_flat <- as.data.table(lwe_thickness) # lon lwe_thickness_flat$V1 <- lon[lwe_thickness_flat$V1] # lat lwe_thickness_flat$V2 <- lat[lwe_thickness_flat$V2] # time lwe_thickness_flat$V3 <- time[lwe_thickness_flat$V3] # 将数据框lwe_thickness_flat转换为数据表对象 lwe_thickness_flat<-as.data.table(lwe_thickness_flat) # 设置列名 setnames(lwe_thickness_flat, c("V1","V2","V3","value"), c("lon","lat","time","lwe_thickness"))
- 获取缺失子集作为演示
library(dplyr) # 对数据进行日期筛选 sampled_data <- lwe_thickness_flat %>% filter(time < as.Date("2003-12-16"))
- 结果展示
lon lat time lwe_thickness 1: 0.125 -89.875 2002-04-18 -2.468668 2: 0.125 -89.875 2002-05-10 -1.569404 3: 0.125 -89.875 2002-08-16 -3.044039 4: 0.125 -89.875 2002-09-16 -3.201635 5: 0.125 -89.875 2002-10-16 -1.785586 --- 17625596: 359.875 89.875 2003-07-16 3.973456 17625597: 359.875 89.875 2003-08-16 3.146194 17625598: 359.875 89.875 2003-09-16 3.110446 17625599: 359.875 89.875 2003-10-16 4.867654 17625600: 359.875 89.875 2003-11-16 2.196712
四、填补缺失值
- 找出缺失的年月
time_sub <- unique(sampled_data$time) # 提取年月信息 year_month <- format(as.Date(time_sub), "%Y-%m") # 构造包含所有年月的数据框 start_date <- min(year_month) end_date <- max(year_month) all_dates <- seq(as.Date(paste0(start_date, "-01")), as.Date(paste0(end_date, "-01")), by = "month") all_year_month <- format(all_dates, "%Y-%m") df_all_dates <- data.frame(year_month = all_year_month, date = all_dates, stringsAsFactors = FALSE) # 找出缺失的日期 missing_dates <- df_all_dates[!df_all_dates$year_month %in% year_month, ] # 打印输出缺失的日期 print(missing_dates$year_month)
- 填充经度、纬度和时间
# 选取一个栅格的数据 grid_data <- lwe_thickness_flat %>% filter(time == as.Date("2002-04-18")) grid_data$lwe_thickness <- NA # 插入缺失年月的数据 for(i in 1:nrow(missing_dates)){ grid_data$time <- missing_dates$date[i] sampled_data <- rbind(sampled_data, grid_data) }
- 按照时间排序
sampled_data <- sampled_data[order(sampled_data$time)] # 检查排序结果 time_order <- unique(sampled_data$time) time_order
结果展示:
[1] "2002-04-18" "2002-05-10" "2002-06-01" "2002-07-01" "2002-08-16" [6] "2002-09-16" "2002-10-16" "2002-11-16" "2002-12-16" "2003-01-16" [11] "2003-02-15" "2003-03-16" "2003-04-16" "2003-05-11" "2003-06-01" [16] "2003-07-16" "2003-08-16" "2003-09-16" "2003-10-16" "2003-11-16"
缺失的三个月份补充进去了
- 三次样条插值
# 使用gam()函数进行三次样条插值 fit <- gam(lwe_thickness ~ s(lon, lat, time, bs = "cr"), data = dataset) # 预测插值结果 dataset$filled_thickness <- predict(fit, dataset) # 检查插值结果是否有缺失值 sum(is.na(dataset$filled_thickness))
- 三次样条插值
# 导入插值包 install.packages("rugarch") install.packages("zoo") library(rugarch) library(zoo) library(imputeTS) # 使用三次多项式插值填补缺失值 sampled_data$lwe_thickness_filled <- na.spline(sampled_data$lwe_thickness) # 过滤出缺失的数据,查看数据是否有插入 grid_data_filled <- sampled_data %>% filter(time == as.Date("2002-06-01")) # 写入文件并忽略行名 fwrite(sampled_data, file = "D:/log/grace.csv", row.names = FALSE)
结果展示:
lon lat time lwe_thickness lwe_thickness_filled 1: 0.125 -89.875 2002-06-01 NA -1.734405e+00 2: 0.125 -89.625 2002-06-01 NA -2.077785e+00 3: 0.125 -89.375 2002-06-01 NA -2.421165e+00 4: 0.125 -89.125 2002-06-01 NA -2.764544e+00 5: 0.125 -88.875 2002-06-01 NA -3.107923e+00 --- 1036796: 359.875 88.875 2002-06-01 NA -1.765162e+05 1036797: 359.875 89.125 2002-06-01 NA -1.765162e+05 1036798: 359.875 89.375 2002-06-01 NA -1.765162e+05 1036799: 359.875 89.625 2002-06-01 NA -1.765162e+05 1036800: 359.875 89.875 2002-06-01 NA -1.765162e+05
如果不想追加一列lwe_thickness_filled,可以直接赋值给lwe_thickness。我追加一列的目的是方便对比。
参考文献:
Using Satellite‑Based Terrestrial Water Storage Data: A Review