From 969dad87703f01d45d8410ce5e4ae9160cfa81ca Mon Sep 17 00:00:00 2001 From: Sakthi Date: Thu, 20 Mar 2025 01:39:25 -0700 Subject: [PATCH 1/4] [SPARK-51420][SQL] Get minutes of TIME datatype --- .../catalyst/analysis/FunctionRegistry.scala | 2 +- .../expressions/timeExpressions.scala | 54 ++++++++++++++++++- .../sql/catalyst/util/DateTimeUtils.scala | 7 +++ .../expressions/TimeExpressionsSuite.scala | 30 ++++++++++- 4 files changed, 89 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index ec9060f8cb8c2..9db4ec2cbbdb2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -642,7 +642,7 @@ object FunctionRegistry { expression[FromUTCTimestamp]("from_utc_timestamp"), expression[Hour]("hour"), expression[LastDay]("last_day"), - expression[Minute]("minute"), + expressionBuilder("minute", MinuteExpressionBuilder), expression[Month]("month"), expression[MonthsBetween]("months_between"), expression[NextDay]("next_day"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala index d02cd7d725ce5..0ae3e485c9f20 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala @@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.expressions import java.time.DateTimeException import org.apache.spark.sql.catalyst.analysis.ExpressionBuilder -import org.apache.spark.sql.catalyst.expressions.objects.Invoke +import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke} +import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.catalyst.util.TimeFormatter import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.types.StringTypeWithCollation -import org.apache.spark.sql.types.{AbstractDataType, ObjectType, TimeType} +import org.apache.spark.sql.types.{AbstractDataType, IntegerType, ObjectType, TimeType} import org.apache.spark.unsafe.types.UTF8String /** @@ -160,3 +161,52 @@ object TryToTimeExpressionBuilder extends ExpressionBuilder { } } } + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(time_expr) - Returns the minute component of the given time. + """, + examples = """ + Examples: + > SELECT _FUNC_(TIME'23:59:59.999999'); + 59 + """, + since = "4.1.0", + group = "datetime_funcs") +// scalastyle:on line.size.limit +case class MinutesOfTime(child: Expression) + extends RuntimeReplaceable + with ExpectsInputTypes { + + override def replacement: Expression = StaticInvoke( + classOf[DateTimeUtils.type], + IntegerType, + "getMinutesOfTime", + Seq(child), + Seq(child.dataType) + ) + + override def inputTypes: Seq[AbstractDataType] = Seq(TimeType()) + + override def children: Seq[Expression] = Seq(child) + + override def prettyName: String = "minute" + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): Expression = { + copy(child = newChildren.head) + } +} + +object MinuteExpressionBuilder extends ExpressionBuilder { + override def build(name: String, expressions: Seq[Expression]): Expression = { + val child = expressions.head + child.dataType match { + case _: TimeType => + MinutesOfTime(child) + case _ => + Minute(child) + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 1f741169898e9..01c4abdde00c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -113,6 +113,13 @@ object DateTimeUtils extends SparkDateTimeUtils { getLocalDateTime(micros, zoneId).getMinute } + /** + * Returns the minute value of a given TIME (TimeType) value. + */ + def getMinutesOfTime(micros: Long): Int = { + microsToLocalTime(micros).getMinute + } + /** * Returns the second value of a given timestamp value. The timestamp is expressed in * microseconds since the epoch. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala index 422e9cbcda038..067b87600e910 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.{SparkDateTimeException, SparkFunSuite} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ -import org.apache.spark.sql.types.StringType +import org.apache.spark.sql.types.{StringType, TimeType} class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("ParseToTime") { @@ -51,4 +51,32 @@ class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { condition = "CANNOT_PARSE_TIME", parameters = Map("input" -> "'100:50'", "format" -> "'mm:HH'")) } + + test("Minute with TIME type") { + // A few test times in microseconds since midnight: + // time in microseconds -> expected minute + val testTimes = Seq( + localTime() -> 0, + localTime(1) -> 0, + localTime(0, 59) -> 59, + localTime(14, 30) -> 30, + localTime(12, 58, 59) -> 58, + localTime(23, 0, 1) -> 0, + localTime(23, 59, 59, 999999) -> 59 + ) + + // Create a literal with TimeType() for each test microsecond value + // evaluate MinutesOfTime(...), and check that the result matches the expected minute. + testTimes.foreach { case (micros, expectedMinute) => + checkEvaluation( + MinutesOfTime(Literal(micros, TimeType())), + expectedMinute) + } + + // Verify NULL handling + checkEvaluation( + MinutesOfTime(Literal.create(null, TimeType(TimeType.MICROS_PRECISION))), + null + ) + } } From 9248370a316b94d3a741521bf0b54b872b6a4a93 Mon Sep 17 00:00:00 2001 From: Sakthi Date: Thu, 20 Mar 2025 04:28:31 -0700 Subject: [PATCH 2/4] Moving function usage/desc/example to expressionbuilder --- .../sql/catalyst/expressions/timeExpressions.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala index 0ae3e485c9f20..96acf40dbfe65 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala @@ -199,6 +199,19 @@ case class MinutesOfTime(child: Expression) } } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(time_expr) - Returns the minute component of the given time. + """, + examples = """ + Examples: + > SELECT _FUNC_(TIME'23:59:59.999999'); + 59 + """, + since = "4.1.0", + group = "datetime_funcs") +// scalastyle:on line.size.limit object MinuteExpressionBuilder extends ExpressionBuilder { override def build(name: String, expressions: Seq[Expression]): Expression = { val child = expressions.head From e0314492b329ed2d2c70326cee140dbe320f7625 Mon Sep 17 00:00:00 2001 From: Sakthi Date: Fri, 21 Mar 2025 22:17:23 -0700 Subject: [PATCH 3/4] Handle empty expressions list for minute expression builder --- .../expressions/timeExpressions.scala | 26 +++++++++++----- .../expressions/TimeExpressionsSuite.scala | 30 ++++++++++++++++++- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala index 96acf40dbfe65..9fbb8401cdf0e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala @@ -202,24 +202,34 @@ case class MinutesOfTime(child: Expression) // scalastyle:off line.size.limit @ExpressionDescription( usage = """ - _FUNC_(time_expr) - Returns the minute component of the given time. + _FUNC_(expr) - Returns the minute component of the given expression. + + If `expr` is a TIMESTAMP or a string that can be cast to timestamp, + it returns the minute of that timestamp. + If `expr` is a TIME type (since 4.1.0), it returns the minute of the time-of-day. """, examples = """ Examples: + > SELECT _FUNC_('2009-07-30 12:58:59'); + 58 > SELECT _FUNC_(TIME'23:59:59.999999'); 59 """, - since = "4.1.0", + since = "1.5.0", group = "datetime_funcs") // scalastyle:on line.size.limit object MinuteExpressionBuilder extends ExpressionBuilder { override def build(name: String, expressions: Seq[Expression]): Expression = { - val child = expressions.head - child.dataType match { - case _: TimeType => - MinutesOfTime(child) - case _ => - Minute(child) + if (expressions.isEmpty) { + throw QueryCompilationErrors.wrongNumArgsError(name, Seq("> 0"), expressions.length) + } else { + val child = expressions.head + child.dataType match { + case _: TimeType => + MinutesOfTime(child) + case _ => + Minute(child) + } } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala index 067b87600e910..8311418a96c61 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.{SparkDateTimeException, SparkFunSuite} +import org.apache.spark.{SPARK_DOC_ROOT, SparkDateTimeException, SparkFunSuite} +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.types.{StringType, TimeType} @@ -52,6 +53,33 @@ class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { parameters = Map("input" -> "'100:50'", "format" -> "'mm:HH'")) } + test("MinuteExpressionBuilder") { + // Empty expressions list + checkError( + exception = intercept[AnalysisException] { + MinuteExpressionBuilder.build("minute", Seq.empty) + }, + condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + parameters = Map( + "functionName" -> "`minute`", + "expectedNum" -> "> 0", + "actualNum" -> "0", + "docroot" -> SPARK_DOC_ROOT) + ) + + // test TIME-typed child should build MinutesOfTime + val timeExpr = Literal(localTime(12, 58, 59), TimeType()) + val builtExprForTime = MinuteExpressionBuilder.build("minute", Seq(timeExpr)) + assert(builtExprForTime.isInstanceOf[MinutesOfTime]) + assert(builtExprForTime.asInstanceOf[MinutesOfTime].child eq timeExpr) + + // test non TIME-typed child should build MinutesOfTime + val tsExpr = Literal("2009-07-30 12:58:59") + val builtExprForTs = MinuteExpressionBuilder.build("minute", Seq(tsExpr)) + assert(builtExprForTs.isInstanceOf[Minute]) + assert(builtExprForTs.asInstanceOf[Minute].child eq tsExpr) + } + test("Minute with TIME type") { // A few test times in microseconds since midnight: // time in microseconds -> expected minute From 1f875251b44b46a8a46e371a16775bbabff1cb5e Mon Sep 17 00:00:00 2001 From: Sakthi Date: Sat, 22 Mar 2025 00:34:51 -0700 Subject: [PATCH 4/4] Fix comment on the MinuteExpressionBuilder test --- .../spark/sql/catalyst/expressions/TimeExpressionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala index 8311418a96c61..070da53d963d7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala @@ -73,7 +73,7 @@ class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { assert(builtExprForTime.isInstanceOf[MinutesOfTime]) assert(builtExprForTime.asInstanceOf[MinutesOfTime].child eq timeExpr) - // test non TIME-typed child should build MinutesOfTime + // test non TIME-typed child should build Minute val tsExpr = Literal("2009-07-30 12:58:59") val builtExprForTs = MinuteExpressionBuilder.build("minute", Seq(tsExpr)) assert(builtExprForTs.isInstanceOf[Minute])