We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a3c5063 commit 3082b27Copy full SHA for 3082b27
online/src/main/scala/ai/chronon/online/CatalystUtil.scala
@@ -63,6 +63,10 @@ object CatalystUtil {
63
.config("spark.sql.adaptive.enabled", "false")
64
.config("spark.sql.legacy.timeParserPolicy", "LEGACY")
65
.config("spark.ui.enabled", "false")
66
+ // the default column reader batch size is 4096 - spark reads that many rows into memory buffer at once.
67
+ // that causes ooms on large columns.
68
+ // for derivations we only need to read one row at a time.
69
+ // for interactive we set the limit to 16.
70
.config("spark.sql.parquet.columnarReaderBatchSize", "16")
71
.enableHiveSupport() // needed to support registering Hive UDFs via CREATE FUNCTION.. calls
72
.getOrCreate()
0 commit comments