zoukankan      html  css  js  c++  java
  • SQL分析函数学习

    一、分析函数语法:

    function_name(<argument>,<argument>...) over(<partition_Clause><order by_Clause><windowing_Clause>);
    

    function_name():函数名称

    argument:参数

    over( ):开窗函数

    partition_Clause:分区子句,数据记录集分组,group by...

    order by_Clause:排序子句,数据记录集排序,order by...

    windowing_Clause:开窗子句,定义分析函数在操作行的集合,三种开窗方式:rows、range、Specifying

    注:使用开窗子句时一定要有排序子句!!!

    二、分析函数如下:

    1、count() over()  :统计分区中各组的行数,partition by 可选,order by 可选

    1、select name, sex, age, count(*) over() from emp; --总计数
    2、select name, sex, age, count(*) over(order by age) from emp; --递加计数
    3、select name, sex, age, count(*) over(partition by sex) from emp; --分组计数
    4、select name, sex, age, count(*) over(partition by sex order by age) from emp;--分组递加计数
    

    2、sum() over()  :统计分区中记录的总和,partition by 可选,order by 可选

    1、select name, sex, age, sum(salary) over() from emp; --总累计求和
    2、select name, sex, age, sum(salary) over(order by age) from emp; --递加累计求和
    3、select name, sex, age, sum(salary) over(partition by sex) from emp; --分组累计求和
    4、select name, sex, age, sum(salary) over(partition by sex order by age) from emp; --分组递加累计求和
    

    3、avg() over()  :统计分区中记录的平均值,partition by 可选,order by 可选

    1、select name, sex, age, avg(salary) over() from emp; --总平均值
    2、select name, sex, age, avg(salary) over(order by age) from emp; --递加求平均值
    3、select name, sex, age, avg(salary) over(partition by sex) from emp; --分组求平均值
    4、select name, sex, age, avg(salary) over(partition by sex order by age) from emp; --分组递加求平均值
    

    4、min() over() :统计分区中记录的最小值,partition by 可选,order by 可选

         max() over() :统计分区中记录的最大值,partition by 可选,order by 可选

    1、select name, sex, age, salary, min(salary) over() from emp; --求总最小值
    2、select name, sex, age, salary, min(salary) over(order by age) from emp; --递加求最小值
    3、select name, sex, age, salary, min(salary) over(partition by sex) from emp; --分组求最小值
    4、select name, sex, age, salary, min(salary) over(partition by sex order by age) from emp; --分组递加求最小值
    
    
    5、select name, sex, age, salary, max(salary) over() from emp; --求总最大值
    6、select name, sex, age, salary, max(salary) over(order by age) from emp; --递加求最大值
    7、select name, sex, age, salary, max(salary) over(partition by sex) from emp; --分组求最大值
    8、select name, sex, age, salary, max(salary) over(partition by sex order by age) from emp; --分组递加求最大值
    

    5、rank() over()  :跳跃排序,partition by 可选,order by 必选

    1、select name, age, rank() over(partition by job order by age) from emp;
    2、select name, age, rank() over(order by age) from emp;
    

    6、dense_rank() :连续排序,partition by 可选,order by 必选

    1、select name, age, dense_rank() over(partition by job order by age) from emp;
    2、select name, age, dense_rank() over(order by age) from emp;
    

    7、row_number() over() :排序,无重复值,partition by 可选,order by 必选

    1、select name, age, row_number() over(partition by job order by age) from emp;
    2、select name, age, row_number() over(order by age) from emp;
    

    8、ntile(n) over() :partition by 可选,order by 必选

        n表示将分区内记录平均分成n份,多出的按照顺序依次分给前面的组

    select name, salary, ntile(3) over(order by salary desc) from emp;
    select name, salary, ntile(3) over(partition by job order by salary desc) from emp;
    

    9、first_value() over() :取出分区中第一条记录的字段值,partition by 可选,order by 可选

         last_value() over() :取出分区中最后一条记录的字段值,partition by 可选,order by 可选

    1、select name, first_value(salary) over() from emp;
    2、select name, first_value(salary) over(order by salary desc) from emp;
    3、select name, first_value(salary) over(partition by job) from emp;                                                           
    4、select name, first_value(salary) over(partition by job order by salary desc) from emp;
    
    
    5、select name, last_value(ename) over() from emp;
    6、select name, last_value(ename) over(order by salary desc) from emp;
    7、select name, last_value(ename) over(partition by job) from emp;
    8、select name, last_value(ename) over(partition by job order by salary desc) from emp;
    

    10、first :从DENSE_RANK返回的集合中取出排在最前面的一个值的行

          last :从DENSE_RANK返回的集合中取出排在最后面的一个值的行

    select job, max(salary) keep(dense_rank first order by salary desc),
    max(salary) keep(dense_rank last order by salary desc) from emp
    group by job;
    

    11、lag() over() :取出前n行数据,partition by 可选,order by 必选

          lead() over() :取出后n行数据,partition by 可选,order by 必选

    1、select name, age, lag(age,1,0) over(order by salary), 
    lead(age,1,0) over(order by salary) from emp;
     
    2、select name, age, lag(age,1) over(partition by sex order by salary),
    lead(age,1) over(partition by sex order by salary) from emp;
    

    12、ratio_to_report(a) over(partition by b) :求按照b分组后a的值在所属分组中总值的占比,a的值必须为数值或数值型字段

          partition by 可选,order by 不可选

    1、select name, job, salary, ratio_to_report(1) over() from emp; --给每一行赋值1,求当前行在总值的占比,总是0.1
    2、select name, job, salary, ratio_to_report(salary) over() from emp; --当前行的值在所有数据中的占比
    3、select name, job, salary, ratio_to_report(1) over(partition by job) from emp; --给每一行赋值1,求当前行在分组后的组内总值的占比
    4、select name, job, salary, ratio_to_report(salary) over(partition by job) from emp; --当前行的值在分组后组内总值占比
    

    13、percent_rank() over()  :partition by 可选,order by 必选

         所在组排名序号-1除以该组所有的行数-1,排名跳跃排序

    1、select name, job, salary, percent_rank() over(order by salary) from emp;
    2、select name, job, salary, percent_rank() over(partition by job order by salary) from emp;
    

    14、cume_dist() over() :partition by 可选,order by必选

    所在组排名序号除以该组所有的行数,注意对于重复行,计算时取重复行中的最后一行的位置

    1、select name, job, salary, cume_dist() over(order by salary) from emp;
    2、select name, job, salary, cume_dist() over(partition by job order by salary) from emp;
    

    15、precentile_cont( x ) within group(order by ...) over()    :over()中partition by可选,order by 不可选

    x为输入的百分比,是0-1之间的一个小数,返回该百分比位置的数据,若没有则返回以下计算值(r):

    a=1+( x *(N-1) )  x为输入的百分比,N为分区内的记录的行数

    b=ceil ( a )  向上取整

    c = floor( a ) 向下取整

    r=a * 百分比位置上一条数据 + b * 百分比位置下一条数据

    1、select name, job, salary, percentile_cont(0.5) within group(order by salary) over() from emp;
    2、select name, job, salary, percentile_cont(0.5) within group(order by salary) over(partition by job) from emp;
    

    16、precentile_disc( x ) within group(order by ...) over()   :over()中partition by可选,order by 不可选

    x为输入的百分比,是0-1之间的一个小数,返回百分比位置对应位置上的数据值,若没有对应数据值,就取大于该分布值的下一个值

    1、select name, job, salary, percentile_disc(0.5) within group(order by salary) over()from emp;
    2、select name, job, salary, percentile_disc(0.5) within group(order by salary) over(partition by job) from emp;
    

    17、stddev() over():计算样本标准差,只有一行数据时返回0,partition by 可选,order by 可选

          stddev_samp() over():计算样本标准差,只有一行数据时返回null,partition by 可选,order by 可选

          stddev_pop() over():计算总体标准差,partition by 可选,order by 可选

    1、select stddev(stu_age) over() from student; --计算所有记录的样本标准差
    2、select stddev(stu_age) over(order by stu_age) from student; --计算递加的样本标准差
    3、select stddev(stu_age) over(partition by stu_major) from student; --计算分组的样本标准差
    4、select stddev(stu_age) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本标准差
     
     
    5、select stddev_samp(stu_age) over() from student; --计算所有记录的样本标准差
    6、select stddev_samp(stu_age) over(order by stu_age) from student; --计算递加的样本标准差
    7、select stddev_samp(stu_age) over(partition by stu_major) from student; --计算分组的样本标准差
    8、select stddev_samp(stu_age) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本标准差
     
     
    9、select stddev_pop(stu_age) over() from student; --计算所有记录的总体标准差
    10、select stddev_pop(stu_age) over(order by stu_age) from student; --计算递加的总体标准差
    11、select stddev_pop(stu_age) over(partition by stu_major) from student; --计算分组的总体标准差
    12、select stddev_pop(stu_age) over(partition by stu_major order by stu_age) from student;--计算分组递加的总体标准差
    

    18、variance() over():计算样本方差,只有一行数据时返回0,partition by 可选,order by 可选

           var_samp() over():计算样本方差,只有一行数据时返回null,partition by 可选,order by 可选

           var_pop() over():计算总体方差,partition by 可选,order by 可选

    1、select variance(stu_age) over() from student; --计算所有记录的样本方差
    2、select variance(stu_age) over(order by stu_age) from student; --计算递加的样本方差
    3、select variance(stu_age) over(partition by stu_major) from student; --计算分组的样本方差
    4、select variance(stu_age) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本方差
     
     
    5、select var_samp(stu_age) over() from student; --计算所有记录的样本方差
    6、select var_samp(stu_age) over(order by stu_age) from student; --计算递加的样本方差
    7、select var_samp(stu_age) over(partition by stu_major) from student; --计算分组的样本方差
    8、select var_samp(stu_age) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本方差
     
     
    9、select var_pop(stu_age) over() from student; --记录所有就的总体方差
    10、select var_pop(stu_age) over(order by stu_age) from student; --计算递加的总体方差
    11、select var_pop(stu_age) over(partition by stu_major) from student; --计算分组的总体方差
    12、select var_pop(stu_age) over(partition by stu_major order by stu_age) from student;--计算分组递加的样本方差
    

      stddev()=sqrt( variance() )     sqrt()--求开方

           stddev_samp()=sqrt( var_samp() )

           stddec_pop=sqrt( var_pop() )

    19、covar_samp over():返回一对表达式的样本协方差,partition by 可选,order by 可选

           covar_pop over(): 返回一堆表达式的总体协方差,partition by 可选,order by 可选

    1、select covar_samp(stu_age,line) over() from student; --计算所有记录的样本协方差
    2、select covar_samp(stu_age,line) over(order by stu_age) from student; --计算递加的样本协方差
    3、select covar_samp(stu_age,line) over(partition by stu_major) from student; --计算分组的样本协方差
    4、select covar_samp(stu_age,line) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本协方差
     
     
    5、select covar_pop(stu_age,line) over() from student; --计算所有记录的总体协方差
    6、select covar_pop(stu_age,line) over(order by stu_age) from student; --计算递加的总体协方差
    7、select covar_pop(stu_age,line) over(partition by stu_major) from student; --计算分组的总体协方差
    8、select covar_pop(stu_age,line) over(partition by stu_major order by stu_age) from student; --计算分组递加的总体协方差
    

    20、corr() over() :返回一对表达式的相关系数,partition by 可选,order by 可选

    1、select corr(stu_age,line) over() from student; --计算所有记录的相关系数
    2、select corr(stu_age,line) over(order by stu_age) from student; --计算递加的相关系数
    3、select corr(stu_age,line) over(partition by stu_major) from student; --计算分组的相关系数
    4、select corr(stu_age,line) over(partition by stu_major order by stu_age) from student; --计算分组递加的相关系数
    

    21、REGR_ (Linear Regression) Functions:这些线性回归函数适合最小二乘法回归线,有9个不同的回归函数可使用

      

      

  • 相关阅读:
    javascript 获取get参数方法(获取url参数方法)详解
    javascript标签页切换功能(极简代码)
    mb_substr()截取中文方法的详解 (加上‘utf-8’,字符串截取不到的问题详解)
    mysql 垂直分表技术的实战演练,有实战代码。
    php+ajax 实战 (使用ajax小技巧)
    最近开发的ECG项目
    android操作线程各种方法解析
    SQLite数据库在多线程写锁文件的解决办法
    SILVERLIGHT 应急卫生模拟演练项目之childwindow
    SILVERLIGHT 应急卫生模拟演练项目之GRID布局
  • 原文地址:https://www.cnblogs.com/Java-125/p/12034985.html
Copyright © 2011-2022 走看看