zoukankan      html  css  js  c++  java
  • 大型数据库技术测试03——维度清洗

    一、地域维度清洗

    1、上传两个csv文件到Linux(省略)

     

    2、从Linux本地上传两个csv文件到hdfs(省略)

    (1)创建gaoxin

    create table gaoxin(ID String, QA04 String, QA05 String, QA07 String, QA15 String, QA19 String, Hangye String, QB03 String, QB03ONE String, QB03TWO String, QB03_1 String, QB06 String, QB16 String, QB16V String, Gaoxin String, QB16_1 String, QB16_1V String, QC02 String, QC05_0 String, QC24 String, QC40 String, QD01 String, QD28 String, QJ09 String, QJ20 String, QJ55 String, QJ74 String, Diyu String, SYEAR String)ROW format delimited fields terminated by ',' STORED AS TEXTFILE;

     

    (2)创建diyubiao(存放地区代码)

    create table diyu (dm String,

                             dmms String)

                             ROW format delimited fields terminated by ',' STORED AS TEXTFILE;

     

     

    3、切换到mysql更改字符集

     

    克隆一个node01窗口登录mysql

    set character_set_database=utf8;

    set character_set_server=utf8;

    逐步实行上述两条命令

     

     

    4、创建aa_diyu(存放清洗后的数据)

     

    create table aa_diyu(ID String, QA04 String, QA05 String, QA07 String, QA15 String, QA19 String, Hangye String, QB03 String, QB03ONE String, QB03TWO String, QB03_1 String, QB06 String, QB16 String, QB16V String, Gaoxin String, QB16_1 String, QB16_1V String, QC02 String, QC05_0 String, QC24 String, QC40 String, QD01 String, QD28 String, QJ09 String, QJ20 String, QJ55 String, QJ74 String, Diyu String, SYEAR String)ROW format delimited fields terminated by ',' STORED AS TEXTFILE;

     

     

     

    6、数据清洗

    insert into table aa_diyu select gaoxin.ID as ID , gaoxin.QA04 as QA04, gaoxin.QA05 as QA05, gaoxin.QA07 as QA07, gaoxin.QA15 as QA15, gaoxin.QA19 as QA19, gaoxin.Hangye as Hangye, gaoxin.QB03 as QB03,gaoxin.QB03ONE as QB03ONE, gaoxin.QB03TWO as QB03TWO, gaoxin.QB03_1 as QB03_1, gaoxin.QB06 as QB06, gaoxin.QB16 as QB16, gaoxin.QB16V as QB16V, gaoxin.Gaoxin as Gaoxin, gaoxin.QB16_1 as QB16_1, gaoxin.QB16_1V as QB16_1V, gaoxin.QC02 as QC02, gaoxin.QC05_0 as QC05_0, gaoxin.QC24 as QC24, gaoxin.QC40 as QC40, gaoxin.QD01 as QD01, gaoxin.QD28 as QD28, gaoxin.QJ09 as QJ09, gaoxin.QJ20 as QJ20, gaoxin.QJ55 as QJ55, gaoxin.QJ74 as QJ74, concat(gaoxin.QA19,diyu.dmms) as Diyu, gaoxin.SYEAR as SYEAR from gaoxin join diyu on (gaoxin.QA19 =diyu.dm)

    7、导出到MySQL

     

    (1)navicat新建表

    create table aa_diyu(ID varchar(255), QA04 varchar(255), QA05 varchar(255), QA07 varchar(255), QA15 varchar(255), QA19 varchar(255), Hangye varchar(255), QB03 varchar(255), QB03ONE varchar(255), QB03TWO varchar(255), QB03_1 varchar(255), QB06 varchar(255), QB16 varchar(255), QB16V varchar(255), Gaoxin varchar(255), QB16_1 varchar(255), QB16_1V varchar(255), QC02 varchar(255), QC05_0 varchar(255), QC24 varchar(255), QC40 varchar(255), QD01 varchar(255), QD28 varchar(255), QJ09 varchar(255), QJ20 varchar(255), QJ55 varchar(255), QJ74 varchar(255), Diyu varchar(255), SYEAR varchar(255))

     

    2)从hive导出到mysql

     

    bin/sqoop export \

    --connect "jdbc:mysql://node01:3306/jianmin?useUnicode=true&characterEncoding=utf-8" \

    --username root \

    --password hadoop \

    --table aa_diyu \

    --m 1 \

    --export-dir /user/hive/warehouse/test1.db/aa_diyu \

    --input-fields-terminated-by ","

     

     

    结果:

     

     

  • 相关阅读:
    YOLO V5
    YOLO系列(单阶段目标检测)
    优化算法
    算法总结
    图像分类算法
    ResNet网络(里程碑)
    GoogleNet网络(纵横交错)
    VGGNet网络(走向深度)
    AlexNet网络(开山之作)
    案例
  • 原文地址:https://www.cnblogs.com/Lizhichengweidashen/p/14904650.html
Copyright © 2011-2022 走看看