diff --git a/flink/src/test/scala/org/apache/spark/sql/avro/AvroDeSerializationSupportSpec.scala b/flink/src/test/scala/org/apache/spark/sql/avro/AvroDeSerializationSupportSpec.scala index 35e4cda119..2190d59961 100644 --- a/flink/src/test/scala/org/apache/spark/sql/avro/AvroDeSerializationSupportSpec.scala +++ b/flink/src/test/scala/org/apache/spark/sql/avro/AvroDeSerializationSupportSpec.scala @@ -1,13 +1,17 @@ package org.apache.spark.sql.avro +import ai.chronon.api.{StructType => ChrononStructType} import ai.chronon.flink.test.UserAvroSchema -import ai.chronon.online.AvroCodec +import ai.chronon.online.{AvroCodec, AvroConversions, CatalystUtil} +import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.flink.api.common.serialization.DeserializationSchema import org.apache.flink.api.common.serialization.SerializationSchema import org.apache.flink.metrics.groups.UnregisteredMetricsGroup import org.apache.flink.util.SimpleUserCodeClassLoader import org.apache.flink.util.UserCodeClassLoader +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.scalatest.flatspec.AnyFlatSpec import scala.collection.JavaConverters._ @@ -66,6 +70,37 @@ class AvroDeSerializationSupportSpec extends AnyFlatSpec { val row = deserSchema.deserialize(recordBytes) assert(row == null) } + + // This test straddles Avro deser and Catalyst Util functionality. We locate this here as we don't have a + // dependency on the flink module in CU (online). This test sanity checks large schemas like beacon top that have + // a few hundred fields and confirms that we are able to run the catalyst expression through them without issues + it should "successfully deser large avro schemas" in { + + val beaconTopSchema = + "{\"type\":\"record\",\"name\":\"BeaconTop\",\"namespace\":\"com.etsy\",\"fields\":[{\"name\":\"event_name\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"long\"},{\"name\":\"browser_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"primary_event\",\"type\":\"boolean\"},{\"name\":\"guid\",\"type\":\"string\"},{\"name\":\"page_guid\",\"type\":\"string\"},{\"name\":\"event_logger\",\"type\":\"string\"},{\"name\":\"event_source\",\"type\":\"string\"},{\"name\":\"ip\",\"type\":\"string\"},{\"name\":\"user_agent\",\"type\":\"string\"},{\"name\":\"loc\",\"type\":\"string\"},{\"name\":\"ref\",\"type\":\"string\"},{\"name\":\"cookies\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}],\"default\":null},{\"name\":\"ab\",\"type\":[\"null\",{\"type\":\"map\",\"values\":{\"type\":\"array\",\"items\":\"string\"}}],\"default\":null},{\"name\":\"user_id\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"isMobileRequest\",\"type\":[\"null\",\"boolean\"],\"default\":null},{\"name\":\"isMobileDevice\",\"type\":[\"null\",\"boolean\"],\"default\":null},{\"name\":\"isMobileTemplate\",\"type\":[\"null\",\"boolean\"],\"default\":null},{\"name\":\"detected_currency_code\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"detected_language\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"detected_region\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_ids\",\"type\":[\"null\",{\"type\":\"array\",\"items\":\"long\"}],\"default\":null},{\"name\":\"event_timestamp\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"properties\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}],\"default\":null},{\"name\":\"properties_top\",\"type\":{\"type\":\"record\",\"name\":\"BeaconTopProperties\",\"fields\":[{\"name\":\"ab_uri\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"accept_languages\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"accept_language\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"aco\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"action_payload\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"active_ads_experiments\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"active_experiment_flag_variant\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"active_listings\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"activity_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ad_organic_order\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"additional_information\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"advertising_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"airgap_url\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"answer_set\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"apiKey\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"app_foreground_time\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"app_in_background\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"app_initial_start_time\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"app_name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"app_start_time\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"app_state\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"app_version\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"app_version_code\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"apple_pay_info\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"applied_attribute_facets\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"arm_scores\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"asfl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"async_neu_specs_route\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"atc_buttons_shown\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"atf_placeholders\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"attempted_send_beacon\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"attribute_coverage_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"attribute_facets\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"attributed_engagement_event\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"attributes\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"badge_lc\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"badge_lc_ranker_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bandit_attribution\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"below_fold_modules\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bidMultiplier\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_active_experiment_flag\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_cap_reasons\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_constraints\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_increase\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_strategy\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_valuation\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_valuation_expected_profit\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_valuation_expected_seller_revenue\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_valuation_initial_bid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"bid_valuation_transformed_pred_cvr\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"breakpoint\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"buyer_features\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"candidateSourceScores\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"candidate_signal_identifiers\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"candidate_source_map\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cappedBid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cart_amounts\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cart_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cart_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cart_listing_availability\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cart_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cart_listing_keys\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"categorized_images\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"category_input\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cdn_provider\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"child_cart_subtotals\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"client_provided_features\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"collection_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"content_source\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"content_source_uid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"content_sources\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"context\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"context_name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"contextual_info\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cost\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"coupon_code\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cr\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"cropped_listing_card_image\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"csrScore\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"current_tags\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"customized_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"datasets\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"datasets_map\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"dd_referrer\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"details\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"device_config_client_request_ts\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"device_config_client_response_ts\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"device_config_server_ts\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"device_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"device_resolution\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"device_system_name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"device_system_version\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"disc_lc\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"discount_pct\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"discovery_theme\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"display_loc\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"display_price\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"display_price_usd\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"distance\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"download_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ebid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"eligible_signal_content\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"eligible_signal_count\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"eligible_signal_identifiers\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"embeddingsSimilarity\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"endpoint_class\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"error\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"error_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"error_text\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"errors\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"etsy_generated_collections_details\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"etsy_transit_times_metadata\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"etsy_transit_times_source_metadata\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"etsy_url\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"examples\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"experiment_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"experiments\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"fbis\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"fbp\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"finalScore\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"first\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"franz_host\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"from_page\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"fs_lc\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"fsb\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"full_uri\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ga_client_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"gdpr_p\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"gdpr_tp\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"generated_tags_count\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"gift_idea_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"giftiness_score\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"group_1\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"group_2\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"group_3\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"group_4\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"grouped_cart_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"hardware_manufacturer\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"hardware_model\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"hardware_platform\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"hardware_platform_string\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"has_generated_tags\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"header_fingerprint\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"header_signature\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"image_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"images\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"inferred_context\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"inputValue\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"interaction_next_paint_element\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ios_advertising_identifier\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ip_org\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isAutoBid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isBot\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isChromeInstantRequest\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isEtsyApp\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isInWebView\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isInternal\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isMobileRequestIgnoreCookie\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isMobileSupported\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isMozPrefetchRequest\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isPreviewRequest\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isSupportLogin\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isSyntheticTest\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isTabletSupported\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isTestAccount\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isTouch\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isWhiteListedMobileDevice\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_OSA_landing\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_bid_boosted\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_collected\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_domestic\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_faved\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_in_cart\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_prefetch\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_shop_fav\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"is_surfaced\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"keep_shopping_link\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"largest_contentful_paint_element\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"layout_shift_elements\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"link\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listingPrice\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_attribution_keys\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_availability\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_decorators\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_destinations\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_impression_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_original_creation_dates\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_price_usd\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_prices\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_prices_usd\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_quantities\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_state\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_titles\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listing_totals\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listings_delivered\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listings_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listings_suppressed\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"listings_taxonomy_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"liveBudgetRemaining\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"load_strategy\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"loaf_entries\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"local_signal\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"locale_currency_code\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"logging_class\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"logging_key\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"mail_classes\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"marketplace\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"maxCPC\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"median_listing_price_usd\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"message\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"message_to_seller\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"metadata\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"mmxVariant\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"mmx_behavior\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"mmx_request_uuid_map\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"model_inputs_2\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"model_inputs_3\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"model_inputs_4\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"module_details\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"module_placement\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"module_placements\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"modules\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"msg\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"network_type\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"nfyfs\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"nfyss\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"nonce\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"npfyfs\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"observer_types\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ofl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"on_screen_modules\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"orientation\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"originalScore\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"other_resolution\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ott_buyer_filters_shown\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"output\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"pace\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"page_listing_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"page_number\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"page_or_screen\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"page_time\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"page_type\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"parent_context_name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"parent_inferred_context\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"parent_page_guid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"pathways_formatted\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"pathways_json\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"persona_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"personalization\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"personalized_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"placement\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"platformObjectiveScore\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"policy\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"postBoostedBid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"post_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"preBoostedBid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"predCtr\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"predCvr\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"pref_language\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"prefix\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"price\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"primary_shop_language\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"prolistExpectedRPC\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"prolistStaticCVR\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"prolistStaticCalibratedCVR\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"prolist_value\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"psfcl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"psffl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"psffs\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"pssfl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"purchase_category\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"q4_detail\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"quality\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"quality_eff\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"queries\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"queries_to_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"query\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"queryTimes\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"query_suggestions\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"query_taxo_path\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"random\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ranking\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"redirected_query\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"refTag\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"referer\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"referring_listing_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"region\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"region_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"region_identifier\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"regions_displayed\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"related_terms\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"relevancy\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"report_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"request_uri\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"request_url\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"request_uuid\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"requested_lang\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"results\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"reviewText\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"reward_set_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"rfc\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"rlhlcra\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"rptyfl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"rsh\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"rss\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"scores\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"scores_from_solr\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"search_query\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"second_pass_ranker_map\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"security_accept_language\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"security_attempted_url\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"security_full_header_list\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"security_referer\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"security_uaid_cookie_value\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"security_useragent\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"selected_interests\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"sellerObjectiveScore\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"sellerObjectiveWeight\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"serve_time\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"server_timestamp_offset\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ship_by_date_origin\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"shipping_days_origin\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"shop_country\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"shop_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"shop_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"shop_shop_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"shop_user_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"signal_identifier\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"slugs\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"sold_listing_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"sold_product_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"solr_count\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"special_ad_treatments\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"special_ad_treatments_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"stack_trace\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"suggested_search_queries\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"summary_stats\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"tag_diff\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"tags_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"target_listing_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"targets\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"taxo_paths\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"taxonomy_facet_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"taxonomy_id\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"taxonomy_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"taxonomy_path\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"taxonomy_paths\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"term\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"tfServingEnabled\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"time_zone\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"tippers\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"title_diff\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"total_unseen\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"transaction_file_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"transaction_ids\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"translated_query\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"tvbEnabled\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"tvbScaledRoas\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"tyc\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"unique_listing_identifiers\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"url\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"urlRef\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"use_multi_objective_ranking\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"utm_campaign\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"value\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"values\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"version\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"web_url\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"webkit_page_visibility\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"zd_user_data\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"zendesk_referrer\",\"type\":[\"null\",\"string\"],\"default\":null}]}}]}" + val beaconTopPayload = + "Jmxpc3RpbmdfaW1hZ2Vfc3dpcGWi5NzBn2UCODNEMTAxNEIxRjAzRDQxMjJBMzVCQzkwNEU0MTYASEExQUEyNEI2LTRFQzMtNDZGMS1BRTZDLTc3NzdGQzQ5QUE4OUhERkUzQjI0QS01MjI0LTRDQzktQkY1NC1DNzhDOEQ1Q0EyQ0QMbmF0aXZlBmlvcxo2OC4yMjYuMTQzLjMwlAJNb3ppbGxhLzUuMCAoaVBob25lOyBDUFUgaVBob25lIE9TIDE4XzFfMSBsaWtlIE1hYyBPUyBYKSBBcHBsZVdlYktpdC82MDUuMS4xNSAoS0hUTUwsIGxpa2UgR2Vja28pIE1vYmlsZS8xNUUxNDggRXRzeUluYy83LjEyIHJ2OjcxMjAwLjgwLjAA2gJldHN5Oi8vc2NyZWVuL2Jyb3dzZWxpc3RpbmdzP3JlZj1wYWdlLTIqbG9jYXRpb24tMTEqaXRlbXNfcGVyX3BhZ2UtMzYqcXVlcnktcnVzdGljJTIwd2VkZGluZyUyMGNha2UlMjBjdXR0ZXIqaXNfYWQtMCpjc2x1Zy00ZGY0ZDE0MGM2OThjZWY0ZTg3NDAwZmFkMjc3MGE2NTAzN2E5MjQwOjY1NTQwMjE4MgIEBmZ2ZRgxNzM5MDUyOTgyLjAcZXRhbGFfb3ZlcnJpZGVMMC4zRDEwMTRCMUYwM0Q0MTIyQTM1QkM5MDRFNDE2LjAuMC4wLjAAAAK6pYMlAgACAAIAAAAAAAL+x9vBn2UCBBhhY3RpdmVfaW5kZXgCMRRudW1faW1hZ2VzBDExAAACCmVuLVVTAAAAAAAAAAAAAAAAAAIkMTczOTM5NTQ3OC40Mjc4MjY5AAIiMTY0NjYxNDEyNy4wMjk1MDECDkV0c3lJbmMCIjE2NDY2MTQxMjcuMDI5NTAxAgxhY3RpdmUCJDcuMTIgcnY6NzEyMDAuODAuMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIYdmlld19saXN0aW5nAAAAAAAAAAAAAAAAAiQxNzM5Mzk1NDc4LjQwNzc2MjECJDE3MzkzOTU0NzguNjc2MTEyMgIUMTczOTM5NTQ3OAAAAgZpT1MCDDE4LjEuMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjRmcmFuei1wcm9kLTdmNWNjYzY3Yi1xczl4YgAAAAAAAgIzAgIzAAAAAAAAAAAAAAIUaVBob25lMTIsMQISaVBob25lIDExAAAAAAACMkV0c3lMaXN0aW5nVmlld0NvbnRyb2xsZXIAAAIIbnVsbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCHRydWUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgZVU0QAAAAAAAAAAAAAAAAAAAAAAAAAAAIIV2lmaQAAAAAAAAACEHBvcnRyYWl0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCmVuLVVTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgRVUwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgoyMjUyOQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAh5BbWVyaWNhL0NoaWNhZ28AAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + val payloadBytes = java.util.Base64.getDecoder.decode(beaconTopPayload) + + val (encoder, deserSchema) = AvroDeserializationSupport.build("test-topic", beaconTopSchema) + deserSchema.open(new DummyInitializationContext) + val sparkRow = deserSchema.deserialize(payloadBytes) + + val selects = Map( + "listing_id" -> "EXPLODE(SPLIT(COALESCE(properties['sold_listing_ids'], properties['listing_id']), ','))" + ).toSeq + val wheres = Seq( + s"event_name in ('backend_cart_payment', 'backend_add_to_cart')" + ) + val chrononSchema = + AvroConversions.toChrononSchema(new Schema.Parser().parse(beaconTopSchema)).asInstanceOf[ChrononStructType] + val eventExprEncoder = encoder.asInstanceOf[ExpressionEncoder[Row]] + val rowSerializer = eventExprEncoder.createSerializer() + val cu = new CatalystUtil(chrononSchema, selects, wheres) + val catalystInternalRow = rowSerializer(sparkRow) + val result = cu.performSql(catalystInternalRow) + assert(result.isEmpty) // no rows should be returned as the event is not in the where clause + } } object AvroObjectCreator { diff --git a/online/src/main/scala/ai/chronon/online/CatalystUtil.scala b/online/src/main/scala/ai/chronon/online/CatalystUtil.scala index 076d0d1c29..d2740f2ddd 100644 --- a/online/src/main/scala/ai/chronon/online/CatalystUtil.scala +++ b/online/src/main/scala/ai/chronon/online/CatalystUtil.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.execution.LocalTableScanExec import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.execution.RDDScanExec import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types import org.slf4j.LoggerFactory @@ -54,6 +55,9 @@ object CatalystUtil { private val elemArr: mutable.Queue[T] = mutable.Queue.empty[T] } + // Max fields supported for codegen. If this is exceeded, we fail at creation time to avoid buggy codegen + val MaxFields = 1000 + lazy val session: SparkSession = { val spark = SparkSession .builder() @@ -68,6 +72,7 @@ object CatalystUtil { // for derivations we only need to read one row at a time. // for interactive we set the limit to 16. .config("spark.sql.parquet.columnarReaderBatchSize", "16") + .config("spark.sql.codegen.maxFields", MaxFields) .enableHiveSupport() // needed to support registering Hive UDFs via CREATE FUNCTION.. calls .getOrCreate() assert(spark.sessionState.conf.wholeStageEnabled) @@ -135,7 +140,8 @@ class CatalystUtil(inputSchema: StructType, private val whereClauseOpt = Option(wheres) .filter(_.nonEmpty) .map { w => - s"${w.mkString(" AND ")}" + // wrap each clause in parens + w.map(c => s"( $c )").mkString(" AND ") } @transient lazy val inputSparkSchema: types.StructType = SparkConversions.fromChrononSchema(inputSchema) @@ -197,6 +203,12 @@ class CatalystUtil(inputSchema: StructType, // extract transform function from the df spark plan val func: InternalRow => ArrayBuffer[InternalRow] = filteredDf.queryExecution.executedPlan match { case whc: WholeStageCodegenExec => { + // if we have too many fields, this whole stage codegen will result incorrect code so we fail early + require( + !WholeStageCodegenExec.isTooManyFields(SQLConf.get, inputSparkSchema), + s"Too many fields in input schema. We support a max of: ${CatalystUtil.MaxFields}. Schema: ${inputSparkSchema.simpleString}" + ) + val (ctx, cleanedSource) = whc.doCodeGen() val (clazz, _) = CodeGenerator.compile(cleanedSource) val references = ctx.references.toArray diff --git a/online/src/test/scala/ai/chronon/online/test/CatalystUtilTest.scala b/online/src/test/scala/ai/chronon/online/test/CatalystUtilTest.scala index 9b72b20268..1be2c4b0b2 100644 --- a/online/src/test/scala/ai/chronon/online/test/CatalystUtilTest.scala +++ b/online/src/test/scala/ai/chronon/online/test/CatalystUtilTest.scala @@ -626,7 +626,7 @@ class CatalystUtilTest extends AnyFlatSpec with CatalystUtilTestSparkSQLStructs "json_prediction" -> "{ \"score\": 0.5}" ) - def testWhereClauseShouldFilterEventOut(): Unit = { + it should "test where clause filter events out" in { val selects = Map( "id" -> "key", "created" -> "created_ts", @@ -638,7 +638,7 @@ class CatalystUtilTest extends AnyFlatSpec with CatalystUtilTestSparkSQLStructs assertTrue(res.isEmpty) } - def testJsonInSelectAndValidWhereClause(): Unit = { + it should "test json in select and valid where clause" in { val selects = Map( "id" -> "key", "created" -> "created_ts",