zoukankan      html  css  js  c++  java
  • 某互金数据分析笔试题学习

    show databases;
    create database ship;
    use ship;
    show tables;
    create table leadership
    (manager int(4)primary key,
    DATE date NOT NULL,
    COUNTRY CHAR(20) not null,
    gender char(20) not null,
    age int(11) not null,
    q1 int(11) not null,
    q2 int(11) not null,
    q3 int(11) not null,
    q4 int(11),
    q5 int(11)
    );
    select * from leadership;
    alter table leadership modify date char(20);
    insert into leadership(manager,date,country,gender,age,q1,q2,q3,q4,q5)
    values('1','10/24/08','US','M',32,5,4,5,5,5),
    ('2','10/28/08','US','F',40,3,5,2,5,5),
    ('3','10/01/08','UK','F',25,3,5,5,5,2),
    ('4','10/12/08','UK','M',39,2,3,4,NULL,NULL),
    ('5','05/01/09','UK','F',99,2,2,1,2,1);
    #第一题排序
    select gender,age from leadership order by gender;
    #第二题分箱
    select * ,ceiling(age/5) as 分箱 from leadership order by 分箱;
    #第三题均值填补缺失值
    show variables like 'SQL_SAFE_UPDATES';
    set sql_safe_updates = 0;#关闭安全模式
    #update leadership set q4 = (select avg(q4) from leadership) where q4 is null;
    #mysql中update更新表的时候,子查询不能加入from,会报错。解决方法是:1.inner join 2.再加入一层嵌套
    update leadership set q4 = (select avg(q4) from (select q4 from leadership)tep)where q4 is null;
    #第四题去重
    show variables like 'char%';
    create table AAA (brand varchar(40),
    产量 int(20),
    订单量 int(20),
    销售额 int(30)
    );
    #导入CSV文件
    load data local infile "C:/Users/Administrator/Desktop/AAA.csv" into table AAA fields terminated by ',' lines terminated by ' ';
    show full columns from AAA;
    select * from AAA;
    #查找全部重复记录
    select * from AAA where (AAA.产量,AAA.订单量) in (select 产量,订单量 from AAA group by 产量,订单量 having count(*)>1);
    #提取重复记录情况(去重后的结果)
    select*,count(distinct 产量) from AAA group by 产量;#distinct直接去除返回去重后的条数
    #第五题正则表达式(mysql中匹配要加入1.like2.regexp)正则表达式是用来匹配文本的特殊的串
    select brand REGEXP '[a-z0-9+]' from AAA;
    #第六题转换表
    create table a1 (ID int(4),
    姓名 varchar(10),
    科目 varchar(10),
    成绩 int(20)
    );
    load data local infile "C:/Users/Administrator/Desktop/a1.csv" into table a1 fields terminated by ',' lines terminated by ' ';
    select * from a1;
    select ID,姓名,sum(if(科目='语文',成绩,0)) as 语文成绩,sum(if(科目='数学',成绩,0)) as 数学成绩 from a1 group by ID;

     ------------

    1.要求是可以用R,Python,SAS,SQL实现皆可,欢迎大家交流学习。

    2.表格文件已上传。

  • 相关阅读:
    Flink 读取 Kafka 数据 (极简版)
    自动化测试模型
    C语言字符串处理库函数大全(转)
    c语言笔记
    c语言的自动类型转换(转)
    itest(爱测试)开源接口测试&敏捷测试管理平台8.1.0发布
    itest(爱测试)开源接口测试&敏捷测试&极简项目管理 8.0.0 发布,测试重大升级
    工控机折腾小记
    linux
    华为交换机服务端策略路由配置
  • 原文地址:https://www.cnblogs.com/ao-yu-a/p/11068086.html
Copyright © 2011-2022 走看看