val df_avro = spark.read.format("com.databricks.spark.avro").load("/data")
val df_avro1 = df_avro.withColumn("UPDATE_DT", (df_avro("UPDATE_DT") / 1000).cast("timestamp"))
df_avro1.repartition(1).write.mode("overwrite").format("orc").option("compression", "snappy").save("/data/tmp")
val df_orc = spark.read.format("orc").load("/data/tmp")
df_avro.withColumn("UPDATE_DT", (df_avro("UPDATE_DT") / 1000).cast("timestamp")).select("*").where ("ID=1").show(10,false)
df_orc.select("*").where ("ID=1").show(10,false)
+--------+-----------------------+
|ID | UPDATE_DT |
+--------+-----------------------+
|1 |2017-01-07 21:35:24.538|
+--------+-----------------------+
+--------+-----------------------+
|ID | UPDATE_DT |
+--------+-----------------------+
|1 |2017-01-07 21:35:25.076|
+--------+-----------------------+