diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index da80e629ee31d..725e54d3666fb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -18,12 +18,11 @@ package org.apache.spark.sql.catalyst.util import java.text.SimpleDateFormat -import java.time.{LocalDate, ZoneId} +import java.time.{DateTimeException, LocalDate, ZoneId} import java.util.{Date, Locale} - import org.apache.commons.lang3.time.FastDateFormat - import org.apache.spark.sql.catalyst.util.DateTimeUtils._ +import org.apache.spark.sql.catalyst.util.LegacyDateFormats.LENIENT_SIMPLE_DATE_FORMAT import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._ @@ -84,6 +83,23 @@ class Iso8601DateFormatter( } } +class PartitionDateFormatter(zoneId: ZoneId) extends Iso8601DateFormatter( + DateFormatter.defaultPattern, + zoneId, + TimestampFormatter.defaultLocale, + LENIENT_SIMPLE_DATE_FORMAT, + isParsing = false) { + + override def parse(s: String): Int = { + convertSpecialDate(s.trim, zoneId) match { + case Some(_) => + throw new DateTimeException( + s"$s is a special date which is not valid as a partition date") + case None => super.parse(s) + } + } +} + trait LegacyDateFormatter extends DateFormatter { def parseToDate(s: String): Date @@ -196,4 +212,12 @@ object DateFormatter { def apply(zoneId: ZoneId): DateFormatter = { getFormatter(None, zoneId, isParsing = false) } + + def getPartitioningFormatter(zoneId: ZoneId): DateFormatter = { + if (SQLConf.get.legacyTimeParserPolicy == LEGACY) { + getLegacyFormatter(defaultPattern, zoneId, defaultLocale, LENIENT_SIMPLE_DATE_FORMAT) + } else { + new PartitionDateFormatter(zoneId) + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 6832d1a9954fb..69a39d296e2a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -165,6 +165,24 @@ class FractionTimestampFormatter(zoneId: ZoneId) } } + +class PartitionTimestampFormatter(zoneId: ZoneId) extends Iso8601TimestampFormatter( + TimestampFormatter.partitionPattern, + zoneId, + TimestampFormatter.defaultLocale, + isParsing = true) { + + override def parse(s: String): Long = { + convertSpecialTimestamp(s.trim, zoneId) match { + case Some(x) => + throw new DateTimeException( + s"$s is a special timestamp which is not valid as a partition timestamp") + case None => super.parse(s) + } + } +} + + /** * The custom sub-class of `GregorianCalendar` is needed to get access to * protected `fields` immediately after parsing. We cannot use @@ -284,6 +302,8 @@ object TimestampFormatter { def defaultPattern(): String = s"${DateFormatter.defaultPattern} HH:mm:ss" + def partitionPattern(): String = "yyyy-MM-dd HH:mm:ss[.S]" + private def getFormatter( format: Option[String], zoneId: ZoneId, @@ -347,4 +367,12 @@ object TimestampFormatter { def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = { new FractionTimestampFormatter(zoneId) } + + def getPartitioningFormatter(zoneId: ZoneId): TimestampFormatter = { + if (SQLConf.get.legacyTimeParserPolicy == LEGACY) { + getLegacyFormatter(partitionPattern, zoneId, defaultLocale, LENIENT_SIMPLE_DATE_FORMAT) + } else { + new PartitionTimestampFormatter(zoneId) + } + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 69123ee7af5b9..f0c557af4c063 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -130,11 +130,9 @@ object PartitioningUtils { Map.empty[String, String] } - val dateFormatter = DateFormatter(zoneId) - val timestampFormatter = TimestampFormatter( - timestampPartitionPattern, - zoneId, - isParsing = true) + val dateFormatter = DateFormatter.getPartitioningFormatter(zoneId) + val timestampFormatter = TimestampFormatter.getPartitioningFormatter(zoneId) + // First, we need to parse every partition's path and see if we can find partition values. val (partitionValues, optDiscoveredBasePaths) = paths.map { path => parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes,