以下代码均经过测试,可直接运行
Oracle分析函数、多维函数和Model函数简要说明,主要针对BI报表统计,不一定很全面,但对BI应用场景做了少许说明
--创建一张销售数量表,数据趋势是递增的
CREATE TABLE ComputerSales AS
SELECT
120+TRUNC(rn/12)+ROUND(DBMS_RANDOM.VALUE(1,10)) SalesNumber
FROM
(
SELECT level,ROWNUM rn
FROM DUAL
CONNECT BY ROWNUM<=120
);
--下面用于比较NULL值和非NULL值的统计,可以看出NULL值情况下的COUNT是存在问题的,所以建议数据库系统中最好不要使用NULL值列
SELECT
COUNT(*),
COUNT(a.SalesNumber),
COUNT(DISTINCT a.SalesNumber),
SUM(a.SalesNumber),
AVG(a.SalesNumber),
MAX(a.SalesNumber),
MIN(a.SalesNumber)
FROM ComputerSales A;
DELETE FROM ComputerSales WHERE SalesNumber IS NULL;
COMMIT;
INSERT INTO ComputerSales VALUES(NULL);
COMMIT;
INSERT INTO ComputerSales VALUES(NULL);
COMMIT;
SELECT
COUNT(*),
COUNT(a.SalesNumber),
COUNT(DISTINCT a.SalesNumber),
SUM(a.SalesNumber),
AVG(a.SalesNumber),
MAX(a.SalesNumber),
MIN(a.SalesNumber)
FROM ComputerSales A;
SELECT trunc(dbms_random.value(1,101)),
DELETE FROM ComputerSales WHERE SalesNumber IS NULL;
COMMIT;
--创建增加了日期字段的表
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,TRUNC(SYSDATE)+MOD(A.DateSEQ-1,10) SalesDate
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
--下面是两种创建方式,构招Area列和日期列
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,
CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'
WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'
WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'
WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'
ELSE '其他地区'
END
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
--该例可构造SalesDate和Area的重复数据
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,
TRUNC(SYSDATE)+MOD(A.DateSEQ-1,10) SalesDate,
CASE WHEN AreaSEQ=1 THEN '华南地区'
WHEN AreaSEQ=2 THEN '华北地区'
WHEN AreaSEQ=3 THEN '东北地区'
WHEN AreaSEQ=4 THEN '华东地区'
ELSE '其他地区'
END
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ,ROUND(dbms_random.VALUE(1,5)) AreaSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
--移动平均值,累计求和,当前窗口平均值,当前窗口求和,以及窗口函数和排序函数的作用域
SELECT
Area,SalesDate,SalesNumber,
MIN(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS min_Area_SalesDate,
MAX(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS max_Area_SalesDate,
AVG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS avg_Area_SalesDate,
SUM(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS sum_Area_SalesDate,
COUNT(*) OVER (PARTITION BY Area ORDER BY SalesDate) AS count_Area,
MIN(SalesNumber) OVER (PARTITION BY Area) AS min_Area,
MAX(SalesNumber) OVER (PARTITION BY Area) AS max_Area,
AVG(SalesNumber) OVER (PARTITION BY Area) AS avg_Area,
SUM(SalesNumber) OVER (PARTITION BY Area) AS sum_Area,
COUNT(*) OVER (PARTITION BY Area) AS count_Area
FROM ComputerSales
--观察Rank、Dense_Rank,Row_number,Count的区别
--Rank跳号,Dense_Rank不跳号,Row_number唯一,Count按统计数计也跳号
--如果PARTITION BY和order by 的字段是唯一的话,则这四个函数没什么区别
SELECT
Area,SalesDate,SalesNumber,
RANK() OVER (PARTITION BY Area order by SalesNumber) AS Rank_Area_SalesNumber,
DENSE_RANK() OVER (PARTITION BY Area order by SalesNumber) AS DenseRank_Area_SalesNumber,
ROW_NUMBER() OVER (PARTITION BY Area order by SalesNumber) AS Rownumber_Area_SalesNumber,
COUNT(*) OVER (PARTITION BY Area order by SalesNumber) AS CountAll_Area_SalesNumber,
COUNT(SalesNumber) OVER (PARTITION BY Area order by SalesNumber) AS Count_Area_SalesNumber
FROM ComputerSales
--观察Lag和Lead的异同,以及Lag参数之间的异同
--缺省情况下Lag取前一行的值,Lead取后一行的值
--Lag、lead的第一个参数决定了取行的位置,第二个参数为取不到值时的缺省值
SELECT
Area,SalesDate,SalesNumber,
LAG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lag_Area_SalesNumber,
LEAD(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lead_Area_SalesNumber,
LAG(SalesNumber,1) OVER (PARTITION BY Area order by SalesDate) AS Lag1_Area_SalesNumber,
LAG(SalesNumber,2) OVER (PARTITION BY Area order by SalesDate) AS Lag2_Area_SalesNumber,
LEAD(SalesNumber,1) OVER (PARTITION BY Area order by SalesDate) AS Lead1_Area_SalesNumber,
LEAD(SalesNumber,2) OVER (PARTITION BY Area order by SalesDate) AS Lead2_Area_SalesNumber,
LAG(SalesNumber,1,0) OVER (PARTITION BY Area order by SalesDate) AS Lag10_Area_SalesNumber,
LAG(SalesNumber,2,1) OVER (PARTITION BY Area order by SalesDate) AS Lag21_Area_SalesNumber,
LEAD(SalesNumber,1,0) OVER (PARTITION BY Area order by SalesDate) AS Lead10_Area_SalesNumber,
LEAD(SalesNumber,2,1) OVER (PARTITION BY Area order by SalesDate) AS Lead21_Area_SalesNumber
FROM ComputerSales
--观察First_Value和Last_Value的不同
--如果取同一个同组中最大值最小值对应的某列,使用FIRST_VALUE,按照升降序排列即可
--LAST_VALUE有些像两次分组所求的最后一行
SELECT
Area,SalesDate,SalesNumber,
FIRST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber) AS FirstValue_Area,
FIRST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber DESC) AS FirstValue_Area_Desc,
LAST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber) AS LastValue_Area,
LAST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber DESC) AS LastValue_Area_Desc
FROM ComputerSales
--与上面不同的是,KEEP需要和DENSE_RANK FIRST |DENSE_RANK LAST配合使用,且取的是相同Area中按SalesNumber排序所获得最大或最小的值,而上面只是取第一行或最后一行
SELECT Area,SalesDate,SalesNumber,
DENSE_RANK() OVER(PARTITION BY Area ORDER BY SalesNumber) DENSE_RANK,
MIN(SalesDate) KEEP (DENSE_RANK FIRST ORDER BY SalesNumber) OVER(PARTITION BY Area) min_first,
MIN(SalesDate) KEEP (DENSE_RANK LAST ORDER BY SalesNumber) OVER(PARTITION BY Area) min_last,
MAX(SalesDate) KEEP (DENSE_RANK FIRST ORDER BY SalesNumber) OVER(PARTITION BY Area) max_first,
MAX(SalesDate) KEEP (DENSE_RANK LAST ORDER BY SalesNumber) OVER(PARTITION BY Area) max_last
FROM ComputerSales
--CUME_DIST和PERCENT_RANK差不多,都是累计计算比例,只不过计算基准不同,CUME_DIST更符合一般的做法
--NTILE把数据平分为若干份,更适合用来计算四分位上的值
--RATIO_TO_REPORT,则是求当前值在分区中的比例,且不能与ORDER BY 合起来使用
--PERCENTILE_DISC和PERCENTILE_CONT,则是给定的比例参数所对应的值,一般使用PERCENTILE_DISC即可
SELECT Area,SalesDate,SalesNumber,
ROUND(CUME_DIST() OVER(PARTITION BY Area ORDER BY SalesNumber),2) cume_dist,
ROUND(PERCENT_RANK() OVER(PARTITION BY Area ORDER BY SalesNumber),2) PERCENT_RANK,
ROUND(RATIO_TO_REPORT(SalesNumber) OVER(PARTITION BY Area),2) RATIO_TO_REPORT,
NTILE(4) OVER(PARTITION BY Area ORDER BY SalesNumber) NTILE,
PERCENTILE_DISC(0.7) WITHIN GROUP (ORDER BY SalesNumber) OVER(PARTITION BY Area) PERCENTILE_DISC,
PERCENTILE_CONT(0.7) WITHIN GROUP (ORDER BY SalesNumber) OVER(PARTITION BY Area) PERCENTILE_CONT
FROM ComputerSales
--增加了一列叫销售额,可以进行相关数理统计
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,
ROUND(SalesNumber*10+5*DBMS_RANDOM.VALUE(1,10)) SalesValue,
TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,
CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'
WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'
WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'
WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'
ELSE '其他地区'
END Area
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
SELECT * FROM ComputerSales;
--其他统计,对数理分析有研究的同学可以尝试一下其经济学含义
SELECT Area,SalesDate,SalesValue,SalesNumber,
REGR_SLOPE(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "斜率",
REGR_INTERCEPT(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "截距",
REGR_R2(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线决定系数",
REGR_AVGX(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线自变量平均值",
REGR_AVGY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线应变量平均值",
VAR_POP(SalesValue) OVER(PARTITION BY Area ORDER BY SalesDate) "VAR_POP_应变量",
VAR_POP(SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "VAR_POP_自变量",
COVAR_POP(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "COVAR_POP",
REGR_SXX(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXX", --REGR_COUNT(expr1, expr2) * VAR_POP(expr2)
REGR_SYY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXY", --REGR_COUNT(expr1, expr2) * VAR_POP(expr1)
REGR_SXY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXY", --REGR_COUNT(expr1, expr2) * COVAR_POP(expr1, expr2)
REGR_COUNT(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_COUNT"
FROM ComputerSales
--关于按日期进行环比的问题
--同比则有麻烦,因为日期天数是不固定的
--从ComputerSales随机删除几行再测
SELECT AREA,SALESDATE,SALESNUMBER,
LAG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lag_error, --如遇断号,会导致数据不准
SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 1 PRECEDING AND 1 PRECEDING) yesterday, --昨天的值
SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 6 PRECEDING AND 6 PRECEDING) lastweek, --上周数据
SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 6 PRECEDING AND 0 PRECEDING) last7_accu, --前7天累计,包括当天
SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 29 PRECEDING AND 0 PRECEDING) last30_accu--前30天累计,包括当天
FROM ComputerSales
--再度增加一个product产品列,以方便进行CUBE函数演示
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,
ROUND(SalesNumber*10+5*DBMS_RANDOM.VALUE(1,10)) SalesValue,
TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,
CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'
WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'
WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'
WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'
ELSE '其他地区'
END Area,
CASE WHEN ROUND(DBMS_RANDOM.VALUE(1,3))=1 THEN '产品A'
WHEN ROUND(DBMS_RANDOM.VALUE(1,3))=2 THEN '产品B'
ELSE '产品C'
END Product
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
SELECT * FROM ComputerSales;
--传统的group by语法
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY Product,Area,SalesDate
ORDER BY Product,Area,SalesDate
--ROLLUP (group的字段顺序)
--会自动按Group字段分层统计,与日常报表较为相似
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate --加不加均可,已经自动按分组字段排序
--等价于
SELECT * FROM
(
SELECT Product,Area,SalesDate,SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue --最大级分组
FROM ComputerSales
GROUP BY Product,Area,SalesDate
UNION ALL
SELECT Product,Area,NULL,SUM(SalesNumber),SUM(SalesValue) --按产品、地区分组
FROM ComputerSales
GROUP BY Product,Area,NULL
UNION ALL
SELECT Product,NULL,NULL,SUM(SalesNumber),SUM(SalesValue) --按产品分组
FROM ComputerSales
GROUP BY Product,NULL,NULL
UNION ALL
SELECT NULL,NULL,NULL,SUM(SalesNumber),SUM(SalesValue) --统计总和
FROM ComputerSales
GROUP BY NULL,NULL,NULL
) ORDER BY 1,2,3 --最后再排序
--CUBE (group的字段顺序),与OLAP比较相似,求得所有维度的交汇点
--会自动按Group字段排列组合进行统计
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY CUBE(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate --加不加均可,已经自动按分组字段排序
--两则的区别
--即ROLLUP 为C(3,1)即多了3层
--按照Product,Area,SalesDate;Product,Area;Product;ALL的顺序进行了统计
--CUBE的统计层级则为2的N次方,即全部的有序组合
--按照Product,Area,SalesDate;Product,Area;Product,SalesDate;Product;Area,SalesDate;Area;SalesDate;ALL的顺序进行了统计
--与ROLLUP的等价表达式,相当于ROLLUP的排列组合
SELECT * FROM
(
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue) --先按Product,Area,SalesDate求ROLLUP
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
UNION
SELECT Product,NULL,SalesDate,SUM(SalesNumber),SUM(SalesValue) --再按Product,SalesDate求ROLLUP
FROM ComputerSales
GROUP BY ROLLUP(Product,NULL,SalesDate)
UNION
SELECT NULL,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue) --再按Area,SalesDate求ROLLUP
FROM ComputerSales
GROUP BY ROLLUP(NULL,Area,SalesDate)
UNION
SELECT NULL,NULL,SalesDate,SUM(SalesNumber),SUM(SalesValue) --最后按SalesDate求ROLLUP
FROM ComputerSales
GROUP BY ROLLUP(NULL,NULL,SalesDate)
)
ORDER BY 1,2,3
--GROUPING SETS等同于按三列单独求统计,一般不常用
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY GROUPING SETS(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate ;--加不加均可,已经自动按分组字段排序
--等价于
SELECT * FROM
(
SELECT Product,NULL Area,NULL SalesDate,SUM(SalesNumber),SUM(SalesValue) --按产品分组
FROM ComputerSales
GROUP BY Product,NULL,NULL
UNION ALL
SELECT NULL,Area,NULL,SUM(SalesNumber),SUM(SalesValue) --按地区分组
FROM ComputerSales
GROUP BY NULL,Area,NULL
UNION ALL
SELECT NULL,NULL,SalesDate,SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue --按日期分组
FROM ComputerSales
GROUP BY NULL,NULL,SalesDate
) ORDER BY 1,2,3
--GROUPING函数只接受一个参数,参数为数据表的一列。如果该列为空返回1,否则返回0。
--并且它仅能与 GROUP BY,ROLLUP,CUBE,GROUPING SETS 一起使用。
--稍微运行一下,就发现该函数只是为了做BI报表使用的,把统计行变为1,将来用作字符串替代
SELECT GROUPING(Product), Product,GROUPING(Area),Area,GROUPING(SalesDate),SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate ;
--BI标准报表格式
SELECT
DECODE(ProductFlag,1,'产品汇总',Product),
DECODE(AreaFlag,1,'地区汇总',Area),
DECODE(SalesDateFlag,1,'日期汇总',TO_CHAR(SalesDate,'YYYY-MM-DD')),
SalesNumber,SalesValue
FROM
(
SELECT
GROUPING(Product) ProductFlag, Product,
GROUPING(Area) AreaFlag,Area,
GROUPING(SalesDate) SalesDateFlag,SalesDate,
SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate
)
--GROUPING_ID其实和GROUPING原理差不多,GROUPING参数为单值,且只返回1,1
--GROUPING_ID,则返回按2的指数进行累计得到空值区域的值
SELECT Product,Area,SalesDate,
GROUPING_ID(Product,Area,SalesDate) GROUPING421,
GROUPING_ID(Product,Area) GROUPPING21,
GROUPING_ID(Product) GROUPING1,
SUM(SalesNumber),
SUM(SalesValue)
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate ;--加不加均可,已经自动按分组字段排序
--GROUP_ID函数可以区分重复分组结果,第1 次出现为0,以后每次出现增1。
--GROUP_ID单独答应在SELECT 中出现意义不大,常在HAVING 中使用达到过滤重复统计的目的。
SELECT Product,Area,SalesDate,GROUP_ID(),
SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY CUBE(Product,Area),CUBE(Product,SalesDate)
HAVING GROUP_ID()=0
ORDER BY 1,2,3
--例如该例子中分别按Product,Area和Product,SalesDate会导致产品地区、产品时间的重复计算,导致报表的不清晰
--我们用HAVING GROUP_ID()=0把重复计算的行去掉就OK了
--一般情况下不建议报表程序过度分组,否则到最后连自己都搞糊涂了
--GROUP BY,ROLLUP,CUBE能组合使用,但SELECT中的分组字段必须出现在GROUP BY的相关栏位
--MODEL:MODEL语句的关键字,必须。
--DIMENSION BY:DIMENSION维度的意思,可以理解为数组的索引,必须。
--MEASURES:指定作为数组的列
--RULES:对数组进行各种操作的描述。
--暂时还没搞明白如何应用,只是简单实现了一个求上月、前30天、前7天,前1天的例子
SELECT AREA,PRODUCT,SALESDATE,SALESNUMBER,
AVG30DAY,AVG1MONTH, --最近30天的平均值,最近一个月的平均值
ACCU30DAY,ACCU1MONTH, --最近30天的累加值,最近一个月的累加值
SALESNUMBER1DAY,SALESNUMBER7DAY, --昨天的销售额,一周前的销售额
SALESNUMBER30DAY,SALESNUMBER1MONTH --30天的销售额,上月同天的销售额
FROM ComputerSales
MODEL DIMENSION BY (AREA,PRODUCT,SALESDATE)
MEASURES (SALESNUMBER,0 AVG30DAY,0 AVG1MONTH,0 ACCU30DAY,0 ACCU1MONTH,0 SALESNUMBER1DAY,0 SALESNUMBER7DAY,0 SALESNUMBER30DAY,0 SALESNUMBER1MONTH)
RULES UPDATE
(AVG30DAY[ANY,ANY,ANY]=AVG(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-29 AND CV(SALESDATE)],
AVG1MONTH[ANY,ANY,ANY]=AVG(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN ADD_MONTHS(CV(SALESDATE),-1) AND CV(SALESDATE)],
ACCU30DAY[ANY,ANY,ANY]=SUM(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)],
ACCU1MONTH[ANY,ANY,ANY]=SUM(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN ADD_MONTHS(CV(SALESDATE),-1) AND CV(SALESDATE)],
SALESNUMBER1DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-1 AND CV(SALESDATE)-1],
SALESNUMBER7DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-7 AND CV(SALESDATE)-7],
SALESNUMBER30DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)-30],
SALESNUMBER1MONTH[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)-30]
)
ORDER BY 1,2,3
关于按年月环比统计中可能出现的问题
CREATE TABLE TEST (SALESMONTH VARCHAR(6),SALESNUMBER INT) ;
INSERT INTO TEST VALUES('201002',2);
INSERT INTO TEST VALUES('201004',4);
INSERT INTO TEST VALUES('201007',7);
INSERT INTO TEST VALUES('201008',8);
INSERT INTO TEST VALUES('201010',10);
SELECT SALESMONTH,SALESNUMBER,
LAG(SalesNumber) OVER(order by SalesMONTH) AS Lag10_Area_SalesNumber,
--如遇断号,会导致数据不准
SUM(SalesNumber) OVER(ORDER BY TO_DATE(SalesMONTH||'01','YYYYMMDD') RANGE BETWEEN 1 PRECEDING AND 1 PRECEDING)
FROM TEST
遇到一个问题,假如BI报表中的月份是字符串,而碰巧断月了,如何准确求得上个月的数据,理应为空
如果是天的话可以想办法规避掉,如果是字符串月没想好怎么处理
newkid给了算法
SELECT SALESMONTH,SALESNUMBER,
MAX(SalesNumber) OVER(order by TO_DATE(SalesMONTH,'YYYYMM') RANGE BETWEEN 31 PRECEDING AND 1 PRECEDING )
FROM TEST;
但我觉得结果很正确,但是不保险,而且有点迷糊
是把当前的月份转换成当月的第一天,并且向前推31天到前1天
假如当前月是2月,向前推31天应该到去年12月份了,求的 MAX(SalesNumber) 未必有效
可实际结果是正确的,奇怪
关于Model的用法,实在读不下去
Oracle分析函数、多维函数和Model函数简要说明,主要针对BI报表统计,不一定很全面,但对BI应用场景做了少许说明
--创建一张销售数量表,数据趋势是递增的
CREATE TABLE ComputerSales AS
SELECT
120+TRUNC(rn/12)+ROUND(DBMS_RANDOM.VALUE(1,10)) SalesNumber
FROM
(
SELECT level,ROWNUM rn
FROM DUAL
CONNECT BY ROWNUM<=120
);
--下面用于比较NULL值和非NULL值的统计,可以看出NULL值情况下的COUNT是存在问题的,所以建议数据库系统中最好不要使用NULL值列
SELECT
COUNT(*),
COUNT(a.SalesNumber),
COUNT(DISTINCT a.SalesNumber),
SUM(a.SalesNumber),
AVG(a.SalesNumber),
MAX(a.SalesNumber),
MIN(a.SalesNumber)
FROM ComputerSales A;
DELETE FROM ComputerSales WHERE SalesNumber IS NULL;
COMMIT;
INSERT INTO ComputerSales VALUES(NULL);
COMMIT;
INSERT INTO ComputerSales VALUES(NULL);
COMMIT;
SELECT
COUNT(*),
COUNT(a.SalesNumber),
COUNT(DISTINCT a.SalesNumber),
SUM(a.SalesNumber),
AVG(a.SalesNumber),
MAX(a.SalesNumber),
MIN(a.SalesNumber)
FROM ComputerSales A;
SELECT trunc(dbms_random.value(1,101)),
DELETE FROM ComputerSales WHERE SalesNumber IS NULL;
COMMIT;
--创建增加了日期字段的表
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,TRUNC(SYSDATE)+MOD(A.DateSEQ-1,10) SalesDate
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
--下面是两种创建方式,构招Area列和日期列
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,
CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'
WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'
WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'
WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'
ELSE '其他地区'
END
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
--该例可构造SalesDate和Area的重复数据
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,
TRUNC(SYSDATE)+MOD(A.DateSEQ-1,10) SalesDate,
CASE WHEN AreaSEQ=1 THEN '华南地区'
WHEN AreaSEQ=2 THEN '华北地区'
WHEN AreaSEQ=3 THEN '东北地区'
WHEN AreaSEQ=4 THEN '华东地区'
ELSE '其他地区'
END
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ,ROUND(dbms_random.VALUE(1,5)) AreaSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
--移动平均值,累计求和,当前窗口平均值,当前窗口求和,以及窗口函数和排序函数的作用域
SELECT
Area,SalesDate,SalesNumber,
MIN(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS min_Area_SalesDate,
MAX(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS max_Area_SalesDate,
AVG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS avg_Area_SalesDate,
SUM(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS sum_Area_SalesDate,
COUNT(*) OVER (PARTITION BY Area ORDER BY SalesDate) AS count_Area,
MIN(SalesNumber) OVER (PARTITION BY Area) AS min_Area,
MAX(SalesNumber) OVER (PARTITION BY Area) AS max_Area,
AVG(SalesNumber) OVER (PARTITION BY Area) AS avg_Area,
SUM(SalesNumber) OVER (PARTITION BY Area) AS sum_Area,
COUNT(*) OVER (PARTITION BY Area) AS count_Area
FROM ComputerSales
--观察Rank、Dense_Rank,Row_number,Count的区别
--Rank跳号,Dense_Rank不跳号,Row_number唯一,Count按统计数计也跳号
--如果PARTITION BY和order by 的字段是唯一的话,则这四个函数没什么区别
SELECT
Area,SalesDate,SalesNumber,
RANK() OVER (PARTITION BY Area order by SalesNumber) AS Rank_Area_SalesNumber,
DENSE_RANK() OVER (PARTITION BY Area order by SalesNumber) AS DenseRank_Area_SalesNumber,
ROW_NUMBER() OVER (PARTITION BY Area order by SalesNumber) AS Rownumber_Area_SalesNumber,
COUNT(*) OVER (PARTITION BY Area order by SalesNumber) AS CountAll_Area_SalesNumber,
COUNT(SalesNumber) OVER (PARTITION BY Area order by SalesNumber) AS Count_Area_SalesNumber
FROM ComputerSales
--观察Lag和Lead的异同,以及Lag参数之间的异同
--缺省情况下Lag取前一行的值,Lead取后一行的值
--Lag、lead的第一个参数决定了取行的位置,第二个参数为取不到值时的缺省值
SELECT
Area,SalesDate,SalesNumber,
LAG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lag_Area_SalesNumber,
LEAD(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lead_Area_SalesNumber,
LAG(SalesNumber,1) OVER (PARTITION BY Area order by SalesDate) AS Lag1_Area_SalesNumber,
LAG(SalesNumber,2) OVER (PARTITION BY Area order by SalesDate) AS Lag2_Area_SalesNumber,
LEAD(SalesNumber,1) OVER (PARTITION BY Area order by SalesDate) AS Lead1_Area_SalesNumber,
LEAD(SalesNumber,2) OVER (PARTITION BY Area order by SalesDate) AS Lead2_Area_SalesNumber,
LAG(SalesNumber,1,0) OVER (PARTITION BY Area order by SalesDate) AS Lag10_Area_SalesNumber,
LAG(SalesNumber,2,1) OVER (PARTITION BY Area order by SalesDate) AS Lag21_Area_SalesNumber,
LEAD(SalesNumber,1,0) OVER (PARTITION BY Area order by SalesDate) AS Lead10_Area_SalesNumber,
LEAD(SalesNumber,2,1) OVER (PARTITION BY Area order by SalesDate) AS Lead21_Area_SalesNumber
FROM ComputerSales
--观察First_Value和Last_Value的不同
--如果取同一个同组中最大值最小值对应的某列,使用FIRST_VALUE,按照升降序排列即可
--LAST_VALUE有些像两次分组所求的最后一行
SELECT
Area,SalesDate,SalesNumber,
FIRST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber) AS FirstValue_Area,
FIRST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber DESC) AS FirstValue_Area_Desc,
LAST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber) AS LastValue_Area,
LAST_VALUE(SalesDate) OVER (PARTITION BY Area order by SalesNumber DESC) AS LastValue_Area_Desc
FROM ComputerSales
--与上面不同的是,KEEP需要和DENSE_RANK FIRST |DENSE_RANK LAST配合使用,且取的是相同Area中按SalesNumber排序所获得最大或最小的值,而上面只是取第一行或最后一行
SELECT Area,SalesDate,SalesNumber,
DENSE_RANK() OVER(PARTITION BY Area ORDER BY SalesNumber) DENSE_RANK,
MIN(SalesDate) KEEP (DENSE_RANK FIRST ORDER BY SalesNumber) OVER(PARTITION BY Area) min_first,
MIN(SalesDate) KEEP (DENSE_RANK LAST ORDER BY SalesNumber) OVER(PARTITION BY Area) min_last,
MAX(SalesDate) KEEP (DENSE_RANK FIRST ORDER BY SalesNumber) OVER(PARTITION BY Area) max_first,
MAX(SalesDate) KEEP (DENSE_RANK LAST ORDER BY SalesNumber) OVER(PARTITION BY Area) max_last
FROM ComputerSales
--CUME_DIST和PERCENT_RANK差不多,都是累计计算比例,只不过计算基准不同,CUME_DIST更符合一般的做法
--NTILE把数据平分为若干份,更适合用来计算四分位上的值
--RATIO_TO_REPORT,则是求当前值在分区中的比例,且不能与ORDER BY 合起来使用
--PERCENTILE_DISC和PERCENTILE_CONT,则是给定的比例参数所对应的值,一般使用PERCENTILE_DISC即可
SELECT Area,SalesDate,SalesNumber,
ROUND(CUME_DIST() OVER(PARTITION BY Area ORDER BY SalesNumber),2) cume_dist,
ROUND(PERCENT_RANK() OVER(PARTITION BY Area ORDER BY SalesNumber),2) PERCENT_RANK,
ROUND(RATIO_TO_REPORT(SalesNumber) OVER(PARTITION BY Area),2) RATIO_TO_REPORT,
NTILE(4) OVER(PARTITION BY Area ORDER BY SalesNumber) NTILE,
PERCENTILE_DISC(0.7) WITHIN GROUP (ORDER BY SalesNumber) OVER(PARTITION BY Area) PERCENTILE_DISC,
PERCENTILE_CONT(0.7) WITHIN GROUP (ORDER BY SalesNumber) OVER(PARTITION BY Area) PERCENTILE_CONT
FROM ComputerSales
--增加了一列叫销售额,可以进行相关数理统计
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,
ROUND(SalesNumber*10+5*DBMS_RANDOM.VALUE(1,10)) SalesValue,
TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,
CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'
WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'
WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'
WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'
ELSE '其他地区'
END Area
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
SELECT * FROM ComputerSales;
--其他统计,对数理分析有研究的同学可以尝试一下其经济学含义
SELECT Area,SalesDate,SalesValue,SalesNumber,
REGR_SLOPE(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "斜率",
REGR_INTERCEPT(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "截距",
REGR_R2(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线决定系数",
REGR_AVGX(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线自变量平均值",
REGR_AVGY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "回归线应变量平均值",
VAR_POP(SalesValue) OVER(PARTITION BY Area ORDER BY SalesDate) "VAR_POP_应变量",
VAR_POP(SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "VAR_POP_自变量",
COVAR_POP(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "COVAR_POP",
REGR_SXX(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXX", --REGR_COUNT(expr1, expr2) * VAR_POP(expr2)
REGR_SYY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXY", --REGR_COUNT(expr1, expr2) * VAR_POP(expr1)
REGR_SXY(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_SXY", --REGR_COUNT(expr1, expr2) * COVAR_POP(expr1, expr2)
REGR_COUNT(SalesValue,SalesNumber) OVER(PARTITION BY Area ORDER BY SalesDate) "REGR_COUNT"
FROM ComputerSales
--关于按日期进行环比的问题
--同比则有麻烦,因为日期天数是不固定的
--从ComputerSales随机删除几行再测
SELECT AREA,SALESDATE,SALESNUMBER,
LAG(SalesNumber) OVER (PARTITION BY Area order by SalesDate) AS Lag_error, --如遇断号,会导致数据不准
SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 1 PRECEDING AND 1 PRECEDING) yesterday, --昨天的值
SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 6 PRECEDING AND 6 PRECEDING) lastweek, --上周数据
SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 6 PRECEDING AND 0 PRECEDING) last7_accu, --前7天累计,包括当天
SUM(SalesNumber) OVER (PARTITION BY AREA ORDER BY SALESDATE RANGE BETWEEN 29 PRECEDING AND 0 PRECEDING) last30_accu--前30天累计,包括当天
FROM ComputerSales
--再度增加一个product产品列,以方便进行CUBE函数演示
CREATE TABLE ComputerSalesBAK AS
SELECT SalesNumber,
ROUND(SalesNumber*10+5*DBMS_RANDOM.VALUE(1,10)) SalesValue,
TRUNC(SYSDATE)+MOD(A.DateSEQ-1,24) SalesDate,
CASE WHEN TRUNC((DateSEQ-1)/24)=1 THEN '华南地区'
WHEN TRUNC((DateSEQ-1)/24)=2 THEN '华北地区'
WHEN TRUNC((DateSEQ-1)/24)=3 THEN '东北地区'
WHEN TRUNC((DateSEQ-1)/24)=4 THEN '华东地区'
ELSE '其他地区'
END Area,
CASE WHEN ROUND(DBMS_RANDOM.VALUE(1,3))=1 THEN '产品A'
WHEN ROUND(DBMS_RANDOM.VALUE(1,3))=2 THEN '产品B'
ELSE '产品C'
END Product
FROM (SELECT SalesNumber,ROW_NUMBER() OVER(ORDER BY ROWID) DateSEQ FROM ComputerSales) A;
DROP TABLE ComputerSales;
RENAME ComputerSalesBAK TO ComputerSales;
SELECT * FROM ComputerSales;
--传统的group by语法
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY Product,Area,SalesDate
ORDER BY Product,Area,SalesDate
--ROLLUP (group的字段顺序)
--会自动按Group字段分层统计,与日常报表较为相似
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate --加不加均可,已经自动按分组字段排序
--等价于
SELECT * FROM
(
SELECT Product,Area,SalesDate,SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue --最大级分组
FROM ComputerSales
GROUP BY Product,Area,SalesDate
UNION ALL
SELECT Product,Area,NULL,SUM(SalesNumber),SUM(SalesValue) --按产品、地区分组
FROM ComputerSales
GROUP BY Product,Area,NULL
UNION ALL
SELECT Product,NULL,NULL,SUM(SalesNumber),SUM(SalesValue) --按产品分组
FROM ComputerSales
GROUP BY Product,NULL,NULL
UNION ALL
SELECT NULL,NULL,NULL,SUM(SalesNumber),SUM(SalesValue) --统计总和
FROM ComputerSales
GROUP BY NULL,NULL,NULL
) ORDER BY 1,2,3 --最后再排序
--CUBE (group的字段顺序),与OLAP比较相似,求得所有维度的交汇点
--会自动按Group字段排列组合进行统计
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY CUBE(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate --加不加均可,已经自动按分组字段排序
--两则的区别
--即ROLLUP 为C(3,1)即多了3层
--按照Product,Area,SalesDate;Product,Area;Product;ALL的顺序进行了统计
--CUBE的统计层级则为2的N次方,即全部的有序组合
--按照Product,Area,SalesDate;Product,Area;Product,SalesDate;Product;Area,SalesDate;Area;SalesDate;ALL的顺序进行了统计
--与ROLLUP的等价表达式,相当于ROLLUP的排列组合
SELECT * FROM
(
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue) --先按Product,Area,SalesDate求ROLLUP
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
UNION
SELECT Product,NULL,SalesDate,SUM(SalesNumber),SUM(SalesValue) --再按Product,SalesDate求ROLLUP
FROM ComputerSales
GROUP BY ROLLUP(Product,NULL,SalesDate)
UNION
SELECT NULL,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue) --再按Area,SalesDate求ROLLUP
FROM ComputerSales
GROUP BY ROLLUP(NULL,Area,SalesDate)
UNION
SELECT NULL,NULL,SalesDate,SUM(SalesNumber),SUM(SalesValue) --最后按SalesDate求ROLLUP
FROM ComputerSales
GROUP BY ROLLUP(NULL,NULL,SalesDate)
)
ORDER BY 1,2,3
--GROUPING SETS等同于按三列单独求统计,一般不常用
SELECT Product,Area,SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY GROUPING SETS(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate ;--加不加均可,已经自动按分组字段排序
--等价于
SELECT * FROM
(
SELECT Product,NULL Area,NULL SalesDate,SUM(SalesNumber),SUM(SalesValue) --按产品分组
FROM ComputerSales
GROUP BY Product,NULL,NULL
UNION ALL
SELECT NULL,Area,NULL,SUM(SalesNumber),SUM(SalesValue) --按地区分组
FROM ComputerSales
GROUP BY NULL,Area,NULL
UNION ALL
SELECT NULL,NULL,SalesDate,SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue --按日期分组
FROM ComputerSales
GROUP BY NULL,NULL,SalesDate
) ORDER BY 1,2,3
--GROUPING函数只接受一个参数,参数为数据表的一列。如果该列为空返回1,否则返回0。
--并且它仅能与 GROUP BY,ROLLUP,CUBE,GROUPING SETS 一起使用。
--稍微运行一下,就发现该函数只是为了做BI报表使用的,把统计行变为1,将来用作字符串替代
SELECT GROUPING(Product), Product,GROUPING(Area),Area,GROUPING(SalesDate),SalesDate,SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate ;
--BI标准报表格式
SELECT
DECODE(ProductFlag,1,'产品汇总',Product),
DECODE(AreaFlag,1,'地区汇总',Area),
DECODE(SalesDateFlag,1,'日期汇总',TO_CHAR(SalesDate,'YYYY-MM-DD')),
SalesNumber,SalesValue
FROM
(
SELECT
GROUPING(Product) ProductFlag, Product,
GROUPING(Area) AreaFlag,Area,
GROUPING(SalesDate) SalesDateFlag,SalesDate,
SUM(SalesNumber) SalesNumber,SUM(SalesValue) SalesValue
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate
)
--GROUPING_ID其实和GROUPING原理差不多,GROUPING参数为单值,且只返回1,1
--GROUPING_ID,则返回按2的指数进行累计得到空值区域的值
SELECT Product,Area,SalesDate,
GROUPING_ID(Product,Area,SalesDate) GROUPING421,
GROUPING_ID(Product,Area) GROUPPING21,
GROUPING_ID(Product) GROUPING1,
SUM(SalesNumber),
SUM(SalesValue)
FROM ComputerSales
GROUP BY ROLLUP(Product,Area,SalesDate)
ORDER BY Product,Area,SalesDate ;--加不加均可,已经自动按分组字段排序
--GROUP_ID函数可以区分重复分组结果,第1 次出现为0,以后每次出现增1。
--GROUP_ID单独答应在SELECT 中出现意义不大,常在HAVING 中使用达到过滤重复统计的目的。
SELECT Product,Area,SalesDate,GROUP_ID(),
SUM(SalesNumber),SUM(SalesValue)
FROM ComputerSales
GROUP BY CUBE(Product,Area),CUBE(Product,SalesDate)
HAVING GROUP_ID()=0
ORDER BY 1,2,3
--例如该例子中分别按Product,Area和Product,SalesDate会导致产品地区、产品时间的重复计算,导致报表的不清晰
--我们用HAVING GROUP_ID()=0把重复计算的行去掉就OK了
--一般情况下不建议报表程序过度分组,否则到最后连自己都搞糊涂了
--GROUP BY,ROLLUP,CUBE能组合使用,但SELECT中的分组字段必须出现在GROUP BY的相关栏位
--MODEL:MODEL语句的关键字,必须。
--DIMENSION BY:DIMENSION维度的意思,可以理解为数组的索引,必须。
--MEASURES:指定作为数组的列
--RULES:对数组进行各种操作的描述。
--暂时还没搞明白如何应用,只是简单实现了一个求上月、前30天、前7天,前1天的例子
SELECT AREA,PRODUCT,SALESDATE,SALESNUMBER,
AVG30DAY,AVG1MONTH, --最近30天的平均值,最近一个月的平均值
ACCU30DAY,ACCU1MONTH, --最近30天的累加值,最近一个月的累加值
SALESNUMBER1DAY,SALESNUMBER7DAY, --昨天的销售额,一周前的销售额
SALESNUMBER30DAY,SALESNUMBER1MONTH --30天的销售额,上月同天的销售额
FROM ComputerSales
MODEL DIMENSION BY (AREA,PRODUCT,SALESDATE)
MEASURES (SALESNUMBER,0 AVG30DAY,0 AVG1MONTH,0 ACCU30DAY,0 ACCU1MONTH,0 SALESNUMBER1DAY,0 SALESNUMBER7DAY,0 SALESNUMBER30DAY,0 SALESNUMBER1MONTH)
RULES UPDATE
(AVG30DAY[ANY,ANY,ANY]=AVG(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-29 AND CV(SALESDATE)],
AVG1MONTH[ANY,ANY,ANY]=AVG(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN ADD_MONTHS(CV(SALESDATE),-1) AND CV(SALESDATE)],
ACCU30DAY[ANY,ANY,ANY]=SUM(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)],
ACCU1MONTH[ANY,ANY,ANY]=SUM(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN ADD_MONTHS(CV(SALESDATE),-1) AND CV(SALESDATE)],
SALESNUMBER1DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-1 AND CV(SALESDATE)-1],
SALESNUMBER7DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-7 AND CV(SALESDATE)-7],
SALESNUMBER30DAY[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)-30],
SALESNUMBER1MONTH[ANY,ANY,ANY]=MAX(SALESNUMBER)[CV(),CV(),SALESDATE BETWEEN CV(SALESDATE)-30 AND CV(SALESDATE)-30]
)
ORDER BY 1,2,3
关于按年月环比统计中可能出现的问题
CREATE TABLE TEST (SALESMONTH VARCHAR(6),SALESNUMBER INT) ;
INSERT INTO TEST VALUES('201002',2);
INSERT INTO TEST VALUES('201004',4);
INSERT INTO TEST VALUES('201007',7);
INSERT INTO TEST VALUES('201008',8);
INSERT INTO TEST VALUES('201010',10);
SELECT SALESMONTH,SALESNUMBER,
LAG(SalesNumber) OVER(order by SalesMONTH) AS Lag10_Area_SalesNumber,
--如遇断号,会导致数据不准
SUM(SalesNumber) OVER(ORDER BY TO_DATE(SalesMONTH||'01','YYYYMMDD') RANGE BETWEEN 1 PRECEDING AND 1 PRECEDING)
FROM TEST
遇到一个问题,假如BI报表中的月份是字符串,而碰巧断月了,如何准确求得上个月的数据,理应为空
如果是天的话可以想办法规避掉,如果是字符串月没想好怎么处理
newkid给了算法
SELECT SALESMONTH,SALESNUMBER,
MAX(SalesNumber) OVER(order by TO_DATE(SalesMONTH,'YYYYMM') RANGE BETWEEN 31 PRECEDING AND 1 PRECEDING )
FROM TEST;
但我觉得结果很正确,但是不保险,而且有点迷糊
是把当前的月份转换成当月的第一天,并且向前推31天到前1天
假如当前月是2月,向前推31天应该到去年12月份了,求的 MAX(SalesNumber) 未必有效
可实际结果是正确的,奇怪
关于Model的用法,实在读不下去
http://download.oracle.com/docs/cd/B19306_01/server.102/b14223/sqlmodel.htm
本文转自baoqiangwang51CTO博客,原文链接:http://blog.51cto.com/baoqiangwang/582012,如需转载请自行联系原作者