-
Notifications
You must be signed in to change notification settings - Fork 835
Description
Here's my code where I am trying to create a new data frame out of the result set of my left join on other 2 data frames and then trying to convert it to a dynamic frame.
dfs = sqlContext.read.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("query", "SELECT hashkey as hash From randomtable").load()
#Source
datasource0 = glueContext.create_dynamic_frame.from_catalog(database = "test", table_name = "randomtable", transformation_ctx = "datasource0")
#add hash value
df = datasource0.toDF()
df.cache()
df = df.withColumn("hashkey", sha2(concat_ws("||", *df.columns), 256))
#drop dupes
df1 = df.dropDuplicates(subset=['hashkey'])
#read incremental data
inc = df1.join(dfs, df1["hashkey"] == dfs["hash"], how='left').filter(col('hash').isNull())
#convert it back to glue context
datasource1 = DynamicFrame.fromDF(inc, glueContext, "datasource1")
Here is the error I get when trying to convert a data frame to a dynamic frame.
datasource1 = DynamicFrame.fromDF(inc, glueContext, "datasource1")
File
"/mnt/yarn/usercache/root/appcache/application_1560272525947_0002/container_1560272525947_0002_01_000001/PyGlue.zip/awsglue/dynamicframe.py",line 150, in fromDF
File "/mnt/yarn/usercache/root/appcache/application_1560272525947_0002/container_1560272525947_0002_01_000001/py4j-0.10.4-src.zip/py4j/java_gateway.py",
line 1133, in call
File "/mnt/yarn/usercache/root/appcache/application_1560272525947_0002/container_1560272525947_0002_01_000001/pyspark.zip/pyspark/sql/utils.py",
line 63, in deco
File "/mnt/yarn/usercache/root/appcache/application_1560272525947_0002/container_1560272525947_0002_01_000001/py4j-0.10.4-src.zip/py4j/protocol.py",
line 319, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling z:com.amazonaws.services.glue.DynamicFrame.apply.
: java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.expressions.AttributeReference.(Ljava/lang/String;Lorg/apache/spark/sql/types/DataType;ZLorg/apache/spark/sql/types/Metadata;Lorg/apache/spark/sql/catalyst/expressions/ExprId;Lscala/collection/Seq;)V
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryHelper$$anonfun$8.apply(QueryHelper.scala:66)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryHelper$$anonfun$8.apply(QueryHelper.scala:65)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryHelper.(QueryHelper.scala:64)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.SourceQuery.(SnowflakeQuery.scala:100)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryBuilder.net$snowflake$spark$snowflake$pushdowns$querygeneration$QueryBuilder$$generateQueries(QueryBuilder.scala:98)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryBuilder.liftedTree1$1(QueryBuilder.scala:63)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryBuilder.treeRoot$lzycompute(QueryBuilder.scala:61)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryBuilder.treeRoot(QueryBuilder.scala:60)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryBuilder.tryBuild$lzycompute(QueryBuilder.scala:34)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryBuilder.tryBuild(QueryBuilder.scala:33)
at net.snowflake.spark.snowflake.pushdowns.querygeneration.QueryBuilder$.getRDDFromPlan(QueryBuilder.scala:179)
at net.snowflake.spark.snowflake.pushdowns.SnowflakeStrategy.buildQueryRDD(SnowflakeStrategy.scala:42)
at net.snowflake.spark.snowflake.pushdowns.SnowflakeStrategy.apply(SnowflakeStrategy.scala:24)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:62)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:62)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
Any help is greatly appreciated.