[Author]: kwu
基于sparksql调用shell脚本运行SQL,sparksql提供了类似hive中的 -e , -f ,-i的选项
1、定时调用脚本
#!/bin/sh
# upload logs to hdfs
yesterday=`date --date='1 days ago' +%Y%m%d`
/opt/modules/spark/bin/spark-sql -i /opt/bin/spark_opt/init.sql --master spark://10.130.2.20:7077 --executor-memory 6g --total-executor-cores 45 --conf spark.ui.port=4075 -e "
insert overwrite table st.stock_realtime_analysis PARTITION (DTYPE='01' )
select t1.stockId as stockId,
t1.url as url,
t1.clickcnt as clickcnt,
0,
round((t1.clickcnt / (case when t2.clickcntyesday is null then 0 else t2.clickcntyesday end) - 1) * 100, 2) as LPcnt,
'01' as type,
t1.analysis_date as analysis_date,
t1.analysis_time as analysis_time
from (select stock_code stockId,
concat('http://stockdata.stock.hexun.com/', stock_code,'.shtml') url,
count(1) clickcnt,
substr(from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss'),1,10) analysis_date,
substr(from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss'),12,8) analysis_time
from dms.tracklog_5min
where stock_type = 'STOCK'
and day =
substr(from_unixtime(unix_timestamp(), 'yyyyMMdd'), 1, 8)
group by stock_code
order by clickcnt desc limit 20) t1
left join (select stock_code stockId, count(1) clickcntyesday
from dms.tracklog_5min a
where stock_type = 'STOCK'
and substr(datetime, 1, 10) = date_sub(from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss'),1)
and substr(datetime, 12, 5) <substr(from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss'), 12, 5)
and day = '${yesterday}'
group by stock_code) t2
on t1.stockId = t2.stockId;
"
sqoop export --connect jdbc:mysql://10.130.2.245:3306/charts --username guojinlian --password Abcd1234 --table stock_realtime_analysis --fields-terminated-by '