Date time functions 默认数据格式为yyyy-MM-dd格式
DataFrame数据
val df = Seq(
("A", "2019-01-10", "2019-05-02"),
("B", "2019-01-01", "2019-02-04"),
("D", "2019-01-09", "2019-03-02"))
.toDF("user_id", "start_time", "end_time")
1. def add_months(startDate: Column, numMonths: Int): Column
def add_months 在月份上增加/减少n个月,其中正式表示增加,负数表示减少。
df.select(add_months(col("start_time"), 1).as("add_months")).show() +----------+ |add_months| +----------+ |2019-02-10| |2019-02-01| |2019-02-09| +----------+
2. def current_date(): Column
获取当前时间的年月日
df.select(current_date()).show() +--------------+ |current_date()| +--------------+ | 2020-04-01| | 2020-04-01| | 2020-04-01| +--------------+
3. def current_timestamp(): Column
获取当前时间的时间戳
df.select(current_timestamp()).show() +--------------------------------------+ | current_timestamp() | +--------------------------------------+ |2020-04-01T09:40:03.051+08:00 | |2020-04-01T09:40:03.051+08:00 | |2020-04-01T09:40:03.051+08:00 | +--------------------------------------+
4. def date_add(start: Column, days: Int): Column
在天格式上增加天数
df.select(date_add(col("start_time"),1)).show() +-----------------------+ |date_add(start_time, 1)| +-----------------------+ | 2019-01-11| | 2019-01-02| | 2019-01-10| +-----------------------+
5. def date_format(dateExpr: Column, format: String): Column
将时间转化为某种格式的字符串
df.select(date_format(col("start_time"),"yyyy-MM-dd")).show() +-----------------------------------+ |date_format(start_time, yyyy-MM-dd)| +-----------------------------------+ | 2019-01-10| | 2019-01-01| | 2019-01-09| +-----------------------------------+
6.def date_sub(start: Column, days: Int): Column
时间减去days的日期
df.select(date_sub(col("start_time"),1)).show() +-----------------------+ |date_sub(start_time, 1)| +-----------------------+ | 2019-01-09| | 2018-12-31| | 2019-01-08| +-----------------------+
7. def date_trunc(format: String, timestamp: Column): Column
时间截取,其与的设置为01,时分秒设置成00
date_trunc ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM", "DAY", "DD", "HOUR", "MINUTE", "SECOND", "WEEK", "QUARTER"] 参数格式
df.select(date_trunc("DAY",col("start_time"))).show() +---------------------------+ |date_trunc(DAY, start_time)| +---------------------------+ | 2019-01-10 00:00:00| | 2019-01-01 00:00:00| | 2019-01-09 00:00:00| +---------------------------+
8. def datediff(end: Column, start: Column): Column
计算两个时间的相差时长
df.select(datediff(col("end_time"),col("start_time"))).show() +------------------------------+ |datediff(end_time, start_time)| +------------------------------+ | 112| | 34| | 52| +------------------------------+
8. def dayofmonth(e: Column): Column
返回给定日期,在本月是第几天
df.select(dayofmonth(col("start_time"))).show() +----------------------+ |dayofmonth(start_time)| +----------------------+ | 10| | 1| | 9| +----------------------+
9. def dayofweek(e: Column): Column
返回给定的时间在本周为第几天 ,dayofweek (1 = Sunday, 2 = Monday, ..., 7 = Saturday)
df.select(dayofweek(col("start_time"))).show() +---------------------+ |dayofweek(start_time)| +---------------------+ | 1| | 4| | 4| +---------------------+
10. def dayofyear(e: Column): Column
返回给定的时间在本年为第几天
df.select(dayofyear(col("start_time"))).show() +---------------------+ |dayofyear(start_time)| +---------------------+ | 41| | 92| | 9| +---------------------+
11. def from_unixtime(ut: Column, f: String): Column
将时间戳换算成当前时间
val df = Seq( ("A", "2019-02-10", "0"), ("B", "2020-04-01", "0"), ("D", "2019-01-09", "0")) .toDF("user_id", "start_time", "end_time").repartition(3) df.select(from_unixtime(col("end_time"),"yyyy-MM-dd")).show() +-----------------------------------+ |from_unixtime(end_time, yyyy-MM-dd)| +-----------------------------------+ | 1970-01-01| | 1970-01-01| | 1970-01-01| +-----------------------------------+
12. def from_utc_timestamp(ts: Column, tz: String): Column
给定一个时间 '2017-07-14 02:40:00.0',转化为UTC指定的zoom
df.select(to_utc_timestamp(col("start_time"),"Asia/Seoul")).show() +----------------------------------------+ |to_utc_timestamp(start_time, Asia/Seoul)| +----------------------------------------+ | 2019-02-09 15:00:00| | 2020-03-31 15:00:00| | 2019-01-08 15:00:00| +----------------------------------------+
13.def hour(e: Column): Column
取出时间中的小时
val df = Seq( ("A", "2019-02-10", "2019-02-10 00"), ("B", "2020-04-01", "2020-04-01 02"), ("D", "2019-01-09", "2019-01-09 10")) .toDF("user_id", "start_time", "end_time") df.select(hour(col("end_time"))).show() +--------------+ |hour(end_time)| +--------------+ | 0| | 2| | 10| +--------------+
14. def last_day(e: Column): Column
返回日期所在月的最后一天,如2020-03-04,返回2020-03-31
df.select(last_day(col("end_time"))).show() +------------------+ |last_day(end_time)| +------------------+ | 2019-02-28| | 2020-04-30| | 2019-01-31| +------------------+
15. def minute(e: Column): Column
提取当前时间,分钟数字
val df = Seq( ("A", "2019-02-10", "2019-02-10 00:34"), ("B", "2020-04-01", "2020-04-01 02:21"), ("D", "2019-01-09", "2019-01-09 10:54")) .toDF("user_id", "start_time", "end_time") df.select(minute(col("start_time"))).show() +----------------+ |minute(end_time)| +----------------+ | 34| | 21| | 54| +----------------+
16. def month(e: Column): Column
从时间/时间戳/字符串中提取月份
df.select(month(col("end_time"))).show() +---------------+ |month(end_time)| +---------------+ | 2| | 4| | 1| +---------------+
17. def months_between(date1: Column, date2: Column): Column
返回给定的两个月相差的月份
df.select(months_between(col("end_time"),col("start_time"))).show() +------------------------------------+ |months_between(end_time, start_time)| +------------------------------------+ | 0.09677419| | 0.12903226| | 0.12903226| +------------------------------------+
18. def next_day(date: Column, dayOfWeek: String): Column
给定一个时间列,返回给定的dayOfWeek的时间日期
accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
df.select(next_day(col("end_time"),"Sun")).show() +-----------------------+ |next_day(end_time, Sun)| +-----------------------+ | 2019-02-17| | 2020-04-12| | 2019-01-20| +-----------------------+
19. def quarter(e: Column): Column
返回给定时间/时间/字符串,将1年4等分(range 1 to 4)
Examples:
df.select(quarter(col("end_time"))).show() +-----------------+ |quarter(end_time)| +-----------------+ | 1| | 2| | 1| +-----------------+
21.def second(e: Column): Column
返回给定时间的秒
df.select(second(col("end_time"))).show() +-----------------+ |quarter(end_time)| +-----------------+ | 11| | 32| | 41| +-----------------+
22. def to_date(e: Column, fmt: String): Column
将列转化为特殊格式化的日期列
df.select(to_date(col("start_time"), "yyyy-MM-dd").as("time")).show() +----------+ | time| +----------+ |2019-02-10| |2020-04-01| |2019-01-09| +----------+
23. def to_timestamp(s: Column, fmt: String): Column
将列转化为特殊格式化的timestamp列
df.select(to_timestamp(col("start_time"), "yyyy-MM-dd").as("time")).show() +-------------------+ | time| +-------------------+ |2019-02-10 00:00:00| |2020-04-01 00:00:00| |2019-01-09 00:00:00| +-------------------+ df.select(to_timestamp(col("start_time")).as("time")).show() +-------------------+ | time| +-------------------+ |2019-02-10 00:00:00| |2020-04-01 00:00:00| |2019-01-09 00:00:00| +-------------------+
24. def trunc(date: Column, format: String): Column
trunc截取某部分的日期,其他部分默认为01,第二个参数 ["year", "yyyy", "yy", "mon", "month", "mm"]
df.select( trunc(col("start_time"), "MM")).show() +---------------------+ |trunc(start_time, MM)| +---------------------+ | 2019-02-01| | 2020-04-01| | 2019-01-01| +---------------------+
25. def year(e: Column): Column
提取给定时间的year分
df.select( year(col("start_time"))).show() +----------------+ |year(start_time)| +----------------+ | 2019| | 2020| | 2019| +----------------+