建表
create table gulivideo_ori(
videoId string,
uploader string,
age int,
category array<string>,
length int,
views int,
rate float,
ratings int,
comments int,
relatedId array<string>)
row format delimited
fields terminated by " "
collection items terminated by "&"
stored as textfile;
建表2
create table user_action(user_id string, goods_id string, user_action int, month int, day int)
row format delimited fields terminated by ','
#stored as textfile;
加载数据
命令:load data local inpath '/iphone.csv into table goods_sail_info;
【问题】:导入的中文文本不会出现乱码情况
直接将hive,底层的编码格式改成gbk,和windows一致(不推荐)
ALTER TABLE AAA SET SERDEPROPERTIES ('serialization.encoding'='GBK');
建表并加载数据
CREATE EXTERNAL TABLE dblab.bigdata_user(id INT,uid STRING,item_id STRING,behavior_type INT,item_category STRING,visit_date DATE,province STRING)
COMMENT 'Welcome to xmu dblab!'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
STORED AS TEXTFILE LOCATION '/bigdatacase/dataset';
【hive HQL】
删表:DROP TABLE [IF EXISTS] table_name;
清空表:truncate table 表名;
删除:delete
【数据存储-遇坑】
1 字符清洗;
特殊符号去除;
2 乱码;
Kettle中:在每个转换中的文本文件输出步骤中,编码格式为 utf-8。
...................................................