всем привет! спасибо за помощь с таймзонами!
сделал в итоге так:
1) DIM_TIMEZONE
case class DimTimeZoneRow(
tz_key: Int,
tz_code: String,
tz_name: String,
tz_utc_shift: String
)
val dimTimeZone: Seq[DimTimeZoneRow] = List(
DimTimeZoneRow(1, "UTC", "Coordinated Universal Time", "+0"),
DimTimeZoneRow(2, "CET", "Central European Time", "+1"),
DimTimeZoneRow(3, "Europe/Moscow", "Moscow Standard Time", "+3"),
...
)
val dimTimeZoneDF = dimTimeZone.toDF2) DIM_DATETIMEZONE
case class DimDateTimeZoneRec(
date_key: Int,
hour_key: Int,
tz_fkey: Int,
datetime_iso: String,
date_iso: String,
year: Int,
month_name: String,
month_num_of_year: Int,
month_num_with_year: Int,
month_name_with_year: String,
week_num_of_year: Int,
week_num_with_year: String,
day_num_of_year: Int,
day_num_of_month: Int,
day_num_of_week: Int,
day_name_of_week: String,
is_weekend: Boolean,
hour_hum_of_day: Int,
time_of_day: String
)
val dimDateTimeZone: IndexedSeq[DimDateTimeZoneRec] = dateHoursUTC.flatMap(dateHourUTC => {
dimTimeZone.map(tz => {
val dateHour = dateHourUTC.withZoneSameInstant(ZoneId.of(tz.tz_code))
val dateKeyUTC = dayKeyFormatter.format(dateHourUTC).toInt
val hourKeyUTC = hourKeyFormatter.format(dateHourUTC).toInt
...
DimDateTimeZoneRec(
dateKeyUTC,
hourKeyUTC,
timezoneFKey,
...
)
})
})
val dimDateTimeZoneDF = dimDateTimeZone.toDFdimTimeZone.map(tz => {
val dateHour = dateHourUTC.withZoneSameInstant(ZoneId.of(tz.tz_code))
val dateKeyUTC = dayKeyFormatter.format(dateHourUTC).toInt
val hourKeyUTC = hourKeyFormatter.format(dateHourUTC).toInt
...
DimDateTimeZoneRec(
dateKeyUTC,
hourKeyUTC,
timezoneFKey,
...
)
})
})
val dimDateTimeZoneDF = dimDateTimeZone.toDF
3) в фактовых табличках использую партиционирование по "date_utc", "hour_utc"
они же и являются внешними ключами на DIM_DATETIMEZONE,
Dynamic Partition Pruning в Spark 3+ работает!