zoukankan      html  css  js  c++  java
  • 聚合函数

    聚合函数就是基于多行数据返回一行结果,下面就是Oracle提供的一些列聚合函数:

    AVG

    COLLECT

    CORR

    CORR_*

    COUNT

    COVAR_POP

    COVAR_SAMP

    CUME_DIST

    DENSE_RANK

    FIRST

    GROUP_ID

    GROUPING

    GROUPING_ID

    LAST

    LISTAGG

    MAX

    MEDIAN

    MIN

    PERCENT_RANK

    PERCENTILE_CONT

    PERCENTILE_DISC

    RANK

    REGR_(Linear Regression) Functions

    STATS_BINOMIAL_TEST

    STATS_CROSSTAB

    STATS_F_TEST

    STATS_KS_TEST

    STATS_MODE

    STATS_MW_TEST

    STATS_ONE_WAY_ANOVA

    STATS_T_TEST_*

    STATS_WSR_TEST

    STDDEV

    STDDEV_POP

    STDDEV_SAMP

    SUM

    SYS_XMLAGG

    VAR_ POP

    VAR_ SAMP

    VARI ANCE

    XMLAGG

    1、AVG( distinct|all,expr,over(analytic_clause) ) 求平均值

    SELECT deptno,AVG(sal) "Average"

    From emp

    Group By Deptno

    order by deptno;

    Select Mgr, Ename, Hiredate, Sal,

    AVG(sal) OVER (PARTITION BY mgr ORDER BYhiredate ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS c_mavg

    From Emp

    ORDER BY Mgr, Hiredate, Sal;

    2、COLLECT(distinct|unique,column,orderby expr)

    将结果转换为一个嵌套表

    CREATE TYPE phone_book_t AS TABLE OFphone_list_typ;

    /

    SELECT CAST(COLLECT(phone_numbers) ASphone_book_t) Phone_Book

    FROM customers

    ORDER BY phone_book;

    3、CORR( expr1,expr2,over(analytic_clause) ) 返回一对表达式的相关系数[返回一个-1~1的数,相关系数给出了关联的强度,0表示不相关]

    Select Deptno,CORR(empno,mgr) a1

    From Emp

    Group By Deptno;

    SELECT empno, job,

    TO_CHAR((SYSDATE - hiredate) YEAR TO MONTH )"Yrs-Mns", sal,

    CORR(SYSDATE-hiredate, sal)

    Over(Partition By Job) As"Correlation"

    FROM emp

    4、CORR_K(expr1,expr2,

    COEFFICIENT|ONE_SIDED_SIG|ONE_SIDED_SIG_POS|ONE_SIDED_SIG_NEG|TWO_SIDED_SIG));

    CORR_S(expr1,expr2,

    COEFFICIENT|ONE_SIDED_SIG|ONE_SIDED_SIG_POS|ONE_SIDED_SIG_NEG|TWO_SIDED_SIG));

    COEFFICIENT:相关系数

    ONE_SIDED_SIG :Positive one-tailed significance of the correlation

    ONE_SIDED_SIG_POS :Same as ONE_SIDED_SIG

    ONE_SIDED_SIG_NEG :Negative one-tailed significance of the correlation

    TWO_SIDED_SIG :Two-tailed significanceof the correlation

    Select Count(*) Count,

    CORR_S(sal, mgr) commission,

    CORR_S(sal, Empno) empid

    FROM emp;

    Select Corr_K(Sal, mgr, 'COEFFICIENT')Coefficient,

    Corr_K(Sal, mgr, 'TWO_SIDED_SIG')Two_Sided_P_Value

    FROM emp;

    5、Count(distinct|all expr) OVER(analytic_clause)

    select count(*) a1 from emp;

    Select Ename, Sal,

    COUNT(*) OVER (ORDER BY sal RANGE BETWEEN 50PRECEDING AND 150 FOLLOWING) AS mov_count

    From Emp

    ORDER BY sal,ename;

    6、COVAR_POP(expr1,expr2)OVER(analytic_clause) 返回一对表达式的协方差[(SUM(expr1 * expr2) - SUM(expr2) * SUM(expr1) / n) / n]

    7、COVAR_SAMP(expr1,expr2)OVER(analytic_clause) 返回一对表达式的样本方差[(SUM(expr1 * expr2) - SUM(expr1) * SUM(expr2) / n) / (n-1)]

    SELECT job,

    COVAR_POP(SYSDATE-hiredate, sal) AS covar_pop,

    COVAR_SAMP(SYSDATE-hiredate, sal) AScovar_samp

    FROM emp

    WHERE 1=1

    Group By job

    ORDER BY job, covar_pop, covar_samp;

    8、CUME_DIST(expr)WITHIN GROUP (ORDER BY expr  DESC|ASC  NULLS  FIRST|LAST)

    CUME_DIST() OVER (query_partition_clause order_by_clause)

    求累计分布,结果范围(0,1]

    Select Cume_Dist(15500, 0.05) Within Group

    (Order By Sal, mgr) "Cume-Dist of15500"

    From Emp;

    SELECT job, ename, sal, CUME_DIST()

    Over (Partition By Job Order By Sal) AsCume_Dist

    FROM emp

    Where 1=1

    ORDER BY job, ename, sal, cume_dist;

    9、DENSE_RANK(expr)WITHIN GROUP (ORDER BY expr DESC|ASC NULLS FIRST|LAST)

    DENSE_RANK() OVER (query_partition_clause order_by_clause)

    结果集排序(如果有两个第二名,那么下一个还是第三名)

    SELECT DENSE_RANK(15500, 0.05) WITHIN GROUP

    (Order By Sal Desc, mgr) "DenseRank"

    From Emp;

    Select tmp.Dname,tmp.Ename,tmp.Sal,tmp.Drank

    From

    (

      SelectD.Dname, E.Ename, E.Sal, Dense_Rank()

      Over(Partition By E.deptno Order By E.Sal) As Drank

      FromEmp E, Dept D

      WhereE.Deptno = D.Deptno

    )Tmp

    where 1=1

    10、RANK(expr) WITHINGROUP (ORDER BY expr DESC|ASC NULLS FIRST|LAST)

    RANK() OVER (query_partition_clause order_by_clause)

    结果集排序(如果有两个第二名,那么下一个还是第四名)

    SELECT RANK(15500) WITHIN GROUP (Order By SalDesc) "Rank of 15500"

    FROM emp;

    Select Deptno, Ename, Sal, Mgr,

    RANK() OVER (PARTITION BY deptno ORDER BY salDESC, mgr) "Rank"

    From emp

    Where 1=1

    ORDER BY deptno, ename, sal, mgr, "Rank";

    11、First

    aggregate_function KEEP

    (DENSE_RANK FIRST ORDER BY expr DESC|ASCNULLS FIRST|LAST)

    OVER (query_partition_clause)

    12、LAST

    aggregate_function KEEP

    (DENSE_RANK LAST ORDER BY expr DESC|ASCNULLS FIRST|LAST)

    OVER (query_partition_clause)

    Select Deptno,

    MIN(sal) KEEP (DENSE_RANK FIRST ORDER BYempno) "Worst",

    MAX(sal) KEEP (DENSE_RANK LAST ORDER BY empno)"Best"

    From Emp

    Group By Deptno

    ORDER BY deptno;

    Select Deptno,Ename, Sal,

    Min(Sal) Keep (Dense_Rank First Order Byename) Over (Partition By deptno) "Worst",

    MAX(sal) KEEP (DENSE_RANK LAST ORDER BY ename)OVER (PARTITION BY deptno) "Best"

    From Emp

    ORDER BY deptno, sal, ename;

    13、GROUP_ID() 用于消除GROUP BY子句返回的重复记录。GROUP_ID()不接受任何参数。如果某个特定的分组重复出现n次,那么GROUP_ID()返回从0到n-1之间的一个整数,可以通过 Having GROUP_ID()=0 来消除重复

    14、GROUPING(expr) 用于区分常规行与合计(总计)行

    GROUPING(expr) 常与 GROUP BY ROLLUP(expr)、GROUP BY CUBE(expr)一起使用

    Select deptno,count(empno)a1,GROUP_ID()a2,GROUPING(deptno) a3

    From Emp

    Group By rollup(deptno)

    order by deptno

    15、GROUPING_ID(expr) 返回GROUPING位向量的十进制值。GROUPING位向量的计算方法是将按照顺序对每一列调用GROUPING函数的结果组合起来

    Select deptno,job,count(empno)a1,GROUP_ID()a2,GROUPING(deptno) a3,GROUPING_ID(deptno,job) a4

    From Emp

    Group By Rollup(Deptno,Job)

    order by deptno,job;

    16、LISTAGG(measure_expr, delimiter) WITHIN GROUP (order_by_clause)

    OVER (query_partition_clause)字符串聚合(拼接),这个函数在11G R2中新增,运用很广泛,以前还需要自己写一个拼接的函数

    SELECT LISTAGG(ename, '; ') WITHIN GROUP(ORDER BY hiredate, empno) "Emp_list",

    MIN(hiredate) "Earliest"

    FROM emp

    WHERE 1=1;

    Select Deptno,

    Listagg(ename, '; ') Within Group (Order ByHireDate) "Employees"

    FROM emp

    Group By deptno

    ORDER BY deptno;

    在oracle 10G中 可以考虑: WMSYS.WM_CONCAT

    17、MAX (DISTINCT|ALL expr) OVER ( analytic_clause ) 返回最大值

    18、MIN (DISTINCT|ALL expr) OVER ( analytic_clause ) 返回最小值

    Select Deptno,Max(Sal),MIN(sal)

    From Emp

    group by deptno;

    Select Mgr, Ename, Sal,

    Max(Sal) Over (Partition By Mgr) As Mgr_Max,

    MIN(sal) OVER (PARTITION BY mgr) AS mgr_min

    From Emp

    ORDER BY mgr, ename, sal;

    19、MEDIAN ( expr ) OVER( query_partition_clause ) 返回一组数据的中间值

    例如:{1,2,3,4,5} 则返回3,{1,2,3,4}则返回(2+3)/2

    with tab as(

     Select1 Num From Dual Union All

     select3 from dual union all

     select20 from dual union all  

     select12 from dual

     )

    Select Median(Num) a1 From tab;

    SELECT deptno, MEDIAN(sal)

    From Emp

    Group By Deptno

    ORDER BY deptno; 

    20、PERCENT_RANK ( expr)WITHIN GROUP ( ORDER BY expr DESC|ASC NULLS FIRST|LAST)

    PERCENT_RANK ( ) OVER (query_partition_clauseorder_by_clause )

    和CUME_DIST(累积分配)函数类似,对于一个组中给定的行来说,在计算那行的序号时,先减1,然后除以n-1(n为组中所有的行数)。该函数总是返回0~1(包括1)之间的数

    SELECT PERCENT_RANK(15000,0.05) WITHIN GROUP(ORDER BY sal,mgr) "Percent-Rank"

    From emp;

    Select Deptno,Ename,Sal,Percent_Rank()Over(Partition By Deptno Order By Sal) Pr

    From Emp

    order by deptno,sal;

    21、PERCENTILE_CONT (expr ) WITHIN GROUP ( ORDER BY expr DESC|ASC)

    OVER ( query_partition_clause )

    返回一个与输入的分布百分比值相对应的数据值,分布百分比的计算方法同函数PERCENT_RANK(),
    如果没有正好对应的数据值,就通过下面算法来得到值:
    RN = 1+ (P*(N-1)) 其中P是输入的分布百分比值,N是组内的行数
    CRN = CEIL(RN) FRN= FLOOR(RN)
      If (CRN =FRN = RN) then the result is
       (value of expression from row at RN)
      Otherwisethe result is
       (CRN - RN) * (value of expression for row at FRN) +
       (RN - FRN) * (value of expression for row at CRN)

    SELECT deptno,

    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BYsal DESC) "Median cont",

    PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BYsal DESC) "Median disc"

    FROM emp

    Group By deptno

    ORDER BY deptno;

    SELECT ename, sal, deptno,

    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BYsal DESC) OVER (PARTITION BY deptno) "Percentile_Cont",

    PERCENT_RANK() OVER (PARTITION BY deptno ORDERBY sal DESC) "Percent_Rank"

    FROM emp

    ORDER BY ename, sal, deptno;

    22、PERCENTILE_DISC (expr ) WITHIN GROUP ( ORDER BY expr DESC|ASC)

    OVER ( query_partition_clause )

     percentile_disc函数在功能上类似于percentile_cont函数,只是percentile_cont函数使用了连续分布模型,而percentile_disc函数使用了离期分布模型。当没有值与指定的percent_rank精确匹配的时候,percentile_cont(0.5)会计算两个离得最近的值的平均值。相反,在升序排列的情况下,percentile_disc函数只取比所传递的参数percent_rank值更大的值。在降序排列的时候,percentile_disc函数只取比所传递的参数percent_rank值更小的值。

    Select Ename, Sal, Deptno,

    Percentile_Disc(0.5) Within Group (Order BySal Desc) Over (Partition By Deptno) "Percentile_Disc",

    CUME_DIST() OVER (PARTITION BY deptno ORDER BYsal DESC) "Cume_Dist"

    FROM emp

    ORDER BY ename, sal, deptno

    23、REGR_ (线性回归) 函数

    The linear regression functions are:

    ■ REGR_SLOPE

    ■ REGR_INTERCEPT

    ■ REGR_COUNT

    ■ REGR_R2

    ■ REGR_AVGX

    ■ REGR_AVGY

    ■ REGR_SXX

    ■ REGR_SYY

    ■ REGR_SXY

    语法:

    fun_name(expr1,expr2) OVER(analytic_clause)

    SELECT job, empno ID, sal,

    REGR_SLOPE(SYSDATE-hiredate, sal)

    OVER (PARTITION BY job) slope,

    REGR_INTERCEPT(SYSDATE-hiredate, sal)

    Over (Partition By Job) Intcpt,

    REGR_R2(SYSDATE-hiredate, sal)

    OVER (PARTITION BY job) rsqr,

    REGR_COUNT(SYSDATE-hiredate, sal)

    OVER (PARTITION BY job) count,

    REGR_AVGX(SYSDATE-hiredate, sal)

    OVER (PARTITION BY job) avgx,

    REGR_AVGY(SYSDATE-hiredate, sal)

    OVER (PARTITION BY job) avgy

    From Emp

    ORDER BY job, empno;

    SELECT job,

    Regr_Slope(Sysdate-Hiredate, Sal) Slope,

    Regr_Intercept(Sysdate-Hiredate, Sal)Intercept

    FROM emp

    Group By Job

    ORDER BY job;

    SELECT job,

    REGR_COUNT(SYSDATE-hiredate, sal) count

    From Emp

    Group By Job

    ORDER BY job, count;

    SELECT job,

    Regr_R2(Sysdate-Hiredate, Sal) Regr_R2

    FROM emp

    Group By Job

    ORDER BY job, Regr_R2;

    SELECT job,

    REGR_AVGY(SYSDATE-hiredate, sal) avgy,

    Regr_Avgx(Sysdate-Hiredate, Sal) Avgx

    FROM emp

    Group By Job

    ORDER BY job, avgy, avgx

    SELECT job,

    REGR_SXY(SYSDATE-hiredate, sal) regr_sxy,

    REGR_SXX(SYSDATE-hiredate, sal) regr_sxx,

    Regr_Syy(Sysdate-Hiredate, Sal) Regr_Syy

    FROM emp

    Group By Job

    ORDER BY job;

    24、STATS_BINOMIAL_TEST( expr1 , expr2 , p,

    TWO_SIDED_PROB|EXACT_PROB|ONE_SIDED_PROB_OR_MORE|ONE_SIDED_PROB_OR_LESS)

    统计二项测试是一个精确概率测试用于二分变量,那里只有两个可能值存在。它测试一个样品之间的差异比例和给定的比例

    25、STATS_CROSSTAB (expr1 , expr2,

    CHISQ_OBS|CHISQ_SIG|CHISQ_DF|PHI_COEFFICIENT|CRAMERS_V|CONT_COEFFICIENT|

    COHENS_K

    )

    用于分析两个变量

    26、STATS_F_TEST ( expr1, expr2,

    [STATISTIC|DF_NUM|DF_DEN|ONE_SIDED_SIG|,expr3] |TWO_SIDED_SIG)

    测试两个方差是否有明显的不同

    27、STATS_KS_TEST (expr1 , expr2,STATISTIC|SIG)

    是柯尔莫哥洛夫斯米尔诺夫函数比较两个样品测试他们是否来自相同的总体或有

    相同分布的总体

    28、STATS_MODE ( expr )

    统计模式需要一组值作为它的参数并且返回发生以最大的频率

    Select Deptno, Stats_Mode(Sal)

    FROM emp

    Group By deptno

    ORDER BY deptno, stats_mode(sal);

    29、STATS_MW_TEST (expr1 , expr2,STATISTIC|U_STATISTIC|ONE_SIDED_SIG , expr3|

    TWO_SIDED_SIG)

    一个曼惠特尼测试比较两个独立样本来测试该无效假设,这两个种群具有相同的分布函数与替代并且假设这两个分布函数是不同的

    30、STATS_ONE_WAY_ANOVA( expr1 , expr2,

    SUM_SQUARES_BETWEEN|SUM_SQUARES_WITHIN|DF_BETWEEN|DF_WITHIN|MEAN_SQUARES_BETWEEN|MEAN_SQUARES_WITHIN|F_RATIO|SIG)

    单向方差分析函数(统计一维方差分析)测试差异在意味着(为团体或变量),通过比较两种统计学意义不同的估计方差

    31、STATS_T_TEST_*

    相关函数:

    ■ STATS_T_TEST_ONE: A one-sample t-test

    ■ STATS_T_TEST_PAIRED: A two-sample, paired t-test (also known as acrossed

    t-test)

    ■ STATS_T_TEST_INDEP: A t-test of two independent groups with thesame

    variance (pooled variances)

    ■ STATS_T_TEST_INDEPU: A t-test of two independent groups withunequal

    variance (unpooled variances)

    语法:

    STATS_T_TEST_INDEP|STATS_T_TEST_INDEPU|STATS_T_TEST_ONE|STATS_T_TEST_PAIRED

    ( expr1 , expr2,

    [STATISTIC|ONE_SIDED_SIG|, expr3]|TWO_SIDED_SIG|DF)

    32、STATS_WSR_TEST (expr1 , expr2,STATISTIC|ONE_SIDED_SIG|TWO_SIDED_SIG)

    33、STDDEV (DISTINCT|ALLexpr ) OVER ( analytic_clause ) 返回样本标准偏差

    Select Stddev(Sal) "Deviation"

    FROM emp;

    Select Ename, Sal,

    Stddev(Sal) Over (Order By HireDate)"StdDev"

    FROM emp

    ORDER BY ename, sal, "StdDev";

    34、STDDEV_POP ( expr ) OVER( analytic_clause )

    总体标准偏差计算并返回总体方差的平方根

    SELECT deptno, ename, sal,

    STDDEV_POP(sal) OVER (PARTITION BY deptno) ASpop_std

    From emp

    ORDER BY deptno, ename, sal, pop_std;

    35、STDDEV_SAMP ( expr )OVER ( analytic_clause )

    计算累积样本标准偏差和返回样本方差的平方根

    Select Deptno, Ename, Hiredate, Sal,

    STDDEV_SAMP(sal) OVER (PARTITION BY deptnoORDER BY hiredate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cum_sdev

    From emp

    ORDER BY deptno, ename, hiredate, sal,cum_sdev;

    36、SUM (DISTINCT|ALL expr) OVER ( analytic_clause ) 求和

    Select Mgr, Ename, Sal,

    SUM(sal) OVER (PARTITION BY mgr ORDER BY salRANGE UNBOUNDED PRECEDING) l_csum

    From Emp

    ORDER BY mgr, ename, sal, l_csum;

    37、SYS_XMLAGG ( expr,fmt) 将查询结果生成一个Xml文档

    Select Sys_Xmlagg(Sys_Xmlgen(ename)) Xmlagg

    FROM emp;

    38、VAR_POP ( expr ) OVER ( analytic_clause )

    返回总体方差的一组数字[丢弃Null值]

    39、VAR_SAMP ( expr ) OVER( analytic_clause )

    返回样本方差的一组数字[丢弃Null值]

    Select Var_Pop(Sal),Var_SAMP(Sal)

    FROM emp;

    40、VARIANCE (DISTINCT|ALLexpr ) OVER ( analytic_clause ) 返回回报方差

    SELECT ename, sal, VARIANCE(sal) OVER (ORDERBY hiredate) "Variance"

    FROM emp

    ORDER BY ename, sal, "Variance";

    41、XMLAGG (XMLType_instance order_by_clause) 返回Xml文档

    Select Xmlelement("Department",Xmlagg(Xmlelement("Employee",E.Job||' '||E.Ename) Order By Ename)) As "Dept_list"

    From emp E;

    Select Xmlelement("Department",Xmlagg(Xmlelement("Employee",E.Job||' '||E.Ename))) As "Dept_list"

    From emp E

    GROUP BY e.deptno

  • 相关阅读:
    HDinsight 系列-使用证书登陆中国区Azure
    PowerBI 应用时间智能(生成日期表)
    Hadoop 常用命令之 HDFS命令
    Hadoop分布式集群安装
    DAX:New and returning customers
    Hadoop 安装过程中出现的问题
    ftp 报错 200 Type set to A
    LC.exe 已退出,代码为-1 问题解决
    C# 文件操作
    EfRepository
  • 原文地址:https://www.cnblogs.com/pureEve/p/6559314.html
Copyright © 2011-2022 走看看