zoukankan      html  css  js  c++  java
  • 每个县里都抽100个户且至少来自3个镇

    需求:每县100户,且至少来自3个镇。若是县里没有3个镇,则取所有镇,但只取100条,若是县里所有镇不足100户,则取所有户。

    方法1:花了4个小时

    分析: 少于3个镇(不含)的县 --6个

             少于100户(含)  的县 --2个

             总共多少个县        --26个 select count(distinct county_pac),count(distinct town_pac) from tw表 where stat_time=20180826 ;

    select county_pac,count(distinct town_pac) 镇个数,count(1) 户个数 from tw表 where stat_time=20180826 group by county_pac order by 2       

    COUNTY_PAC  镇个数  户个数

    1  460204000000 1 535

    2 460210000000 1 1030

    3 460404000000 1 4

    4 460202000000 1 87

    5 469033000000 1 350

    6 460203000000 2 628

    7 460108000000 4 515

    8 460106000000 4 473

    9 460105000000 5 1506

    -----1.先找出县至少有3个镇 ,且每个镇至少有 35户

    drop table cs_thz_1;

    create table cs_thz_1 as

    select * from (   select t.*,row_number() over (partition by county_pac order by residence_id ) rid2 from (     select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac           

    ,row_number() over (partition by town_pac order by dbms_random.value) fam_rid           

    ,count(1) over (partition by town_pac) fam_cnt             

    ,count(distinct town_pac) over (partition by county_pac) town_cnt       

    from tw表 where stat_time=20180826  

    ) t where town_cnt>=3 and fam_cnt>=35 and fam_rid<=35--大于15户的镇,每个镇取15个 ) where rid2<=100 ;

    select count(distinct county_pac),count(1) from cs_thz_1 --已插入20个县,还有6个县需要单独处理

    -----2.再找出县=3个镇 ,且至少一个镇< 35户-----这个没有,若有,每个镇先选1个,再随机选97个

    -----3.再找出县=2个镇 ------------------------              每个镇先选1个,再随机选98个

    -----4.再找出县=1个镇 ------------------------              每个镇先选1个,再随机选99个

    select county_pac,count(distinct town_pac) 镇个数,count(1) 户个数 from tw表 t where stat_time=20180826 and not exists( select 1 from cs_thz_1 a where a.county_pac=t.county_pac ) group by county_pac order by 2

    --2

    insert into cs_thz_1(residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac) select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac          ,row_number() over (partition by town_pac order by dbms_random.value) fam_rid    from tw表 t where stat_time=20180826   and not exists( select 1 from cs_thz_1 a where a.county_pac=t.county_pac ) ) where fam_rid=1;

    --3

    insert into cs_thz_1(residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac) select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac          ,row_number() over (partition by county_pac order by dbms_random.value) fam_rid    from tw表 t where stat_time=20180826   and exists( select county_pac from cs_thz_1 a where a.county_pac=t.county_pac group by a.county_pac having count(1)<100 and count(distinct town_pac)=2 )   and not exists ( select * from cs_thz_1 a where a.residence_id=t.residence_id ) ) where fam_rid<=98;

    --4

    insert into cs_thz_1(residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac) select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac          ,row_number() over (partition by county_pac order by dbms_random.value) fam_rid    from tw表 t where stat_time=20180826   and exists( select county_pac from cs_thz_1 a where a.county_pac=t.county_pac group by a.county_pac having count(1)<100 and count(distinct town_pac)=1 )   and not exists ( select * from cs_thz_1 a where a.residence_id=t.residence_id ) ) where fam_rid<=99;

    -----5.验证:24个县都能取100条(其中 460203000000只涉及2个镇),1个县取4条(涉及1个镇),1个县取87条(涉及1个镇)--共2491条

    select county_pac,count(distinct town_pac) town_cnt,count(1) fam_cnt  from cs_thz_1 group by county_pac order by 3,2 ;

    select * from cs_thz_1 order by county_pac,town_pac select count(1) from cs_thz_1;

    方法2:在方法1的基础上又花了2个小时

    ---1.每个县 的每个镇先拿一条,但只插3个镇(每个镇至少有1条,否则不会入tw表),若不足3个镇(应该要2个),也这样弄 ---2.剔除上面的记录,再每个县随机拿(100-已拿条数)条 drop table cs_thz_2; create table cs_thz_2 as with tmp_3_town as (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (     select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac           ,row_number() over (partition by county_pac order by residence_id) fam_rid2     from (       select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac              ,row_number() over (partition by town_pac order by dbms_random.value) fam_rid        from tw表 where stat_time=20180826     ) where fam_rid=1   ) where fam_rid2<=3 ) select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from tmp_3_town union all select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac from (   select residence_id,city_name_s,county_name_s,town_name_s,country_name_s,city_pac,county_pac,town_pac,country_pac          ,row_number() over (partition by county_pac order by dbms_random.value) fam_rid          ,b.fam_cnt as yi_cnt   from tw表 a   ,(select county_pac as t_county_pac,count(1) fam_cnt from tmp_3_town group by county_pac ) b   where a.stat_time=20180826 and a.county_pac=b.t_county_pac and not exists(     select * from tmp_3_town t where t.residence_id=a.residence_id   ) ) t1 where t1.fam_rid<=(100-yi_cnt) ;

    -----3.验证:24个县都能取100条(其中 460203000000 只涉及2个镇),1个县取4条(涉及1个镇),1个县取87条(涉及1个镇)--共2491条      COUNTY_PAC  镇个数  户个数 1  460204000000 1 535 2 460210000000 1 1030 3 460404000000 1 4 4 460202000000 1 87 5 469033000000 1 350 6 460203000000 2 628 7 460108000000 4 515 8 460106000000 4 473 9 460105000000 5 1506 10 469001000000 7 6846

    select county_pac,count(distinct town_pac) town_cnt,count(1) fam_cnt  from cs_thz_2 group by county_pac order by 3,2 ;

    select count(distinct county_pac),count(1) from cs_thz_2 ;--26 2424

    -----4.结果取数一样 select county_pac,count(distinct town_pac) town_cnt,count(1) fam_cnt  from cs_thz_1 group by county_pac order by 3,2 ;

  • 相关阅读:
    QT POST/GET HTTP操作
    PHP生成json
    Windows 获取进程ID
    易语言 多个窗口
    易语言 内存修改框架
    易语言 MD5生成
    易语言 获取验证码
    易语言网页登录 POST
    cs1.6 8倍镜
    HTMLTestRunner.py(Python3)
  • 原文地址:https://www.cnblogs.com/jiangqingfeng/p/9544049.html
Copyright © 2011-2022 走看看