Date time functions 默认数据格式为yyyy-MM-dd格式
DataFrame数据
val df = Seq(
("A", "2019-01-10", "2019-05-02"),
("B", "2019-01-01", "2019-02-04"),
("D", "2019-01-09", "2019-03-02"))
.toDF("user_id", "start_time", "end_time")
1. def add_months(startDate: Column, numMonths: Int): Column
def add_months 在月份上增加/减少n个月,其中正式表示增加,负数表示减少。
df.select(add_months(col("start_time"), 1).as("add_months")).show()
+----------+
|add_months|
+----------+
|2019-02-10|
|2019-02-01|
|2019-02-09|
+----------+
2. def current_date(): Column
获取当前时间的年月日
df.select(current_date()).show() +--------------+ |current_date()| +--------------+ | 2020-04-01| | 2020-04-01| | 2020-04-01| +--------------+
3. def current_timestamp(): Column
获取当前时间的时间戳
df.select(current_timestamp()).show() +--------------------------------------+ | current_timestamp() | +--------------------------------------+ |2020-04-01T09:40:03.051+08:00 | |2020-04-01T09:40:03.051+08:00 | |2020-04-01T09:40:03.051+08:00 | +--------------------------------------+
4. def date_add(start: Column, days: Int): Column
在天格式上增加天数
df.select(date_add(col("start_time"),1)).show()
+-----------------------+
|date_add(start_time, 1)|
+-----------------------+
| 2019-01-11|
| 2019-01-02|
| 2019-01-10|
+-----------------------+
5. def date_format(dateExpr: Column, format: String): Column
将时间转化为某种格式的字符串
df.select(date_format(col("start_time"),"yyyy-MM-dd")).show()
+-----------------------------------+
|date_format(start_time, yyyy-MM-dd)|
+-----------------------------------+
| 2019-01-10|
| 2019-01-01|
| 2019-01-09|
+-----------------------------------+
6.def date_sub(start: Column, days: Int): Column
时间减去days的日期
df.select(date_sub(col("start_time"),1)).show()
+-----------------------+
|date_sub(start_time, 1)|
+-----------------------+
| 2019-01-09|
| 2018-12-31|
| 2019-01-08|
+-----------------------+
7. def date_trunc(format: String, timestamp: Column): Column
时间截取,其与的设置为01,时分秒设置成00
date_trunc ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM", "DAY", "DD", "HOUR", "MINUTE", "SECOND", "WEEK", "QUARTER"] 参数格式
df.select(date_trunc("DAY",col("start_time"))).show()
+---------------------------+
|date_trunc(DAY, start_time)|
+---------------------------+
| 2019-01-10 00:00:00|
| 2019-01-01 00:00:00|
| 2019-01-09 00:00:00|
+---------------------------+
8. def datediff(end: Column, start: Column): Column
计算两个时间的相差时长
df.select(datediff(col("end_time"),col("start_time"))).show()
+------------------------------+
|datediff(end_time, start_time)|
+------------------------------+
| 112|
| 34|
| 52|
+------------------------------+
8. def dayofmonth(e: Column): Column
返回给定日期,在本月是第几天
df.select(dayofmonth(col("start_time"))).show()
+----------------------+
|dayofmonth(start_time)|
+----------------------+
| 10|
| 1|
| 9|
+----------------------+
9. def dayofweek(e: Column): Column
返回给定的时间在本周为第几天 ,dayofweek (1 = Sunday, 2 = Monday, ..., 7 = Saturday)
df.select(dayofweek(col("start_time"))).show()
+---------------------+
|dayofweek(start_time)|
+---------------------+
| 1|
| 4|
| 4|
+---------------------+
10. def dayofyear(e: Column): Column
返回给定的时间在本年为第几天
df.select(dayofyear(col("start_time"))).show()
+---------------------+
|dayofyear(start_time)|
+---------------------+
| 41|
| 92|
| 9|
+---------------------+
11. def from_unixtime(ut: Column, f: String): Column
将时间戳换算成当前时间
val df = Seq(
("A", "2019-02-10", "0"),
("B", "2020-04-01", "0"),
("D", "2019-01-09", "0"))
.toDF("user_id", "start_time", "end_time").repartition(3)
df.select(from_unixtime(col("end_time"),"yyyy-MM-dd")).show()
+-----------------------------------+
|from_unixtime(end_time, yyyy-MM-dd)|
+-----------------------------------+
| 1970-01-01|
| 1970-01-01|
| 1970-01-01|
+-----------------------------------+
12. def from_utc_timestamp(ts: Column, tz: String): Column
给定一个时间 '2017-07-14 02:40:00.0',转化为UTC指定的zoom
df.select(to_utc_timestamp(col("start_time"),"Asia/Seoul")).show()
+----------------------------------------+
|to_utc_timestamp(start_time, Asia/Seoul)|
+----------------------------------------+
| 2019-02-09 15:00:00|
| 2020-03-31 15:00:00|
| 2019-01-08 15:00:00|
+----------------------------------------+
13.def hour(e: Column): Column
取出时间中的小时
val df = Seq(
("A", "2019-02-10", "2019-02-10 00"),
("B", "2020-04-01", "2020-04-01 02"),
("D", "2019-01-09", "2019-01-09 10"))
.toDF("user_id", "start_time", "end_time")
df.select(hour(col("end_time"))).show()
+--------------+
|hour(end_time)|
+--------------+
| 0|
| 2|
| 10|
+--------------+
14. def last_day(e: Column): Column
返回日期所在月的最后一天,如2020-03-04,返回2020-03-31
df.select(last_day(col("end_time"))).show()
+------------------+
|last_day(end_time)|
+------------------+
| 2019-02-28|
| 2020-04-30|
| 2019-01-31|
+------------------+
15. def minute(e: Column): Column
提取当前时间,分钟数字
val df = Seq(
("A", "2019-02-10", "2019-02-10 00:34"),
("B", "2020-04-01", "2020-04-01 02:21"),
("D", "2019-01-09", "2019-01-09 10:54"))
.toDF("user_id", "start_time", "end_time")
df.select(minute(col("start_time"))).show()
+----------------+
|minute(end_time)|
+----------------+
| 34|
| 21|
| 54|
+----------------+
16. def month(e: Column): Column
从时间/时间戳/字符串中提取月份
df.select(month(col("end_time"))).show()
+---------------+
|month(end_time)|
+---------------+
| 2|
| 4|
| 1|
+---------------+
17. def months_between(date1: Column, date2: Column): Column
返回给定的两个月相差的月份
df.select(months_between(col("end_time"),col("start_time"))).show()
+------------------------------------+
|months_between(end_time, start_time)|
+------------------------------------+
| 0.09677419|
| 0.12903226|
| 0.12903226|
+------------------------------------+
18. def next_day(date: Column, dayOfWeek: String): Column
给定一个时间列,返回给定的dayOfWeek的时间日期
accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
df.select(next_day(col("end_time"),"Sun")).show()
+-----------------------+
|next_day(end_time, Sun)|
+-----------------------+
| 2019-02-17|
| 2020-04-12|
| 2019-01-20|
+-----------------------+
19. def quarter(e: Column): Column
返回给定时间/时间/字符串,将1年4等分(range 1 to 4)
Examples:
df.select(quarter(col("end_time"))).show()
+-----------------+
|quarter(end_time)|
+-----------------+
| 1|
| 2|
| 1|
+-----------------+
21.def second(e: Column): Column
返回给定时间的秒
df.select(second(col("end_time"))).show()
+-----------------+
|quarter(end_time)|
+-----------------+
| 11|
| 32|
| 41|
+-----------------+
22. def to_date(e: Column, fmt: String): Column
将列转化为特殊格式化的日期列
df.select(to_date(col("start_time"), "yyyy-MM-dd").as("time")).show()
+----------+
| time|
+----------+
|2019-02-10|
|2020-04-01|
|2019-01-09|
+----------+
23. def to_timestamp(s: Column, fmt: String): Column
将列转化为特殊格式化的timestamp列
df.select(to_timestamp(col("start_time"), "yyyy-MM-dd").as("time")).show()
+-------------------+
| time|
+-------------------+
|2019-02-10 00:00:00|
|2020-04-01 00:00:00|
|2019-01-09 00:00:00|
+-------------------+
df.select(to_timestamp(col("start_time")).as("time")).show()
+-------------------+
| time|
+-------------------+
|2019-02-10 00:00:00|
|2020-04-01 00:00:00|
|2019-01-09 00:00:00|
+-------------------+
24. def trunc(date: Column, format: String): Column
trunc截取某部分的日期,其他部分默认为01,第二个参数 ["year", "yyyy", "yy", "mon", "month", "mm"]
df.select( trunc(col("start_time"), "MM")).show()
+---------------------+
|trunc(start_time, MM)|
+---------------------+
| 2019-02-01|
| 2020-04-01|
| 2019-01-01|
+---------------------+
25. def year(e: Column): Column
提取给定时间的year分
df.select( year(col("start_time"))).show()
+----------------+
|year(start_time)|
+----------------+
| 2019|
| 2020|
| 2019|
+----------------+