diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/AllSupportedFieldsTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/AllSupportedFieldsTestCase.java index a0c222cc09af2..8e63e996a90d8 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/AllSupportedFieldsTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/AllSupportedFieldsTestCase.java @@ -63,11 +63,17 @@ import static org.hamcrest.Matchers.nullValue; /** - * Creates indices with all supported fields and fetches values from them to - * confirm that release builds correctly handle data types, even if they were - * introduced in later versions. + * Queries like {@code FROM * | KEEP *} can include columns of unsupported types, + * and we can run into serialization and correctness issues in mixed version clusters/CCS + * when support for a type is added in a later version. *
- * Entirely skipped in snapshot builds; data types that are under + * This creates indices with all index-able fields and fetches values from them to + * confirm that we correctly handle data types, even if they were introduced in later versions. + * Generally, this means that a type is treated as unsupported if any older node is involved. + * See {@link org.elasticsearch.xpack.esql.session.Versioned} for more details on how ESQL + * handles planning for mixed version clusters. + *
+ * This suite is entirely skipped in snapshot builds; data types that are under * construction are normally tested well enough in spec tests, skipping * old versions via {@link org.elasticsearch.xpack.esql.action.EsqlCapabilities}. *
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 84f7b1f897089..5dcb1f0fa74c2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -44,7 +44,22 @@ * processing pipeline, and types which the language doesn't support, but require * special handling anyway (e.g. {@link DataType#OBJECT}) * + *
+ * When such a type gets support in ES|QL, query plans cannot contain it + * unless all nodes in the cluster (and remote clusters participating in the query) + * support it to avoid serialization errors and semantically invalid results. + * This is an example of version-aware query planning, + * see {@link org.elasticsearch.xpack.esql.session.Versioned}. + * *
+ * In particular, this is where we perform remote calls to pre-analyze the query, + * that is, to resolve indices, enrich policies and their mappings. + *
+ * Note that this is not a session in the traditional sense of a stateful connection. This will + * produce a single result set that is either returned to the user directly or stored for + * later retrieval if the query was async. + */ public class EsqlSession { private static final Logger LOGGER = LogManager.getLogger(EsqlSession.class); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/Versioned.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/Versioned.java index 8d1121993f21b..1c700d7eddd85 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/Versioned.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/Versioned.java @@ -10,8 +10,69 @@ import org.elasticsearch.TransportVersion; /** - * A wrapper for objects to pass along the minimum transport version available in the cluster (and remotes), - * esp. when dealing with listeners. Where this object gets consumed, we generally need to assume that all nodes in the cluster - * (and remotes) are at the minimum transport version, so that we don't use features not supported everywhere. + * ESQL sometimes can only create query plans in a certain way if all nodes in the cluster (and participating remotes) + * are at a certain minimum transport version. This is a wrapper to carry the minimum version along with objects that + * get created during planning. Where this object gets consumed, we need to assume that all nodes are at the minimum + * transport version, so that we don't use features not supported everywhere. + * + *
+ * The minimum version is available in the analyzer and optimizers and can be used to make query plans that only + * work if all nodes are at or above a certain version. This is not required for new language features (commands, + * functions etc.) as it's fine to fail on the transport layer when a user uses a feature that is not supported on + * all nodes. Examples where this is required include: + *
+ * Old nodes don't support this new {@code TopNAggregate} class, but they can already execute this query. Therefore, we + * need to make sure that this optimization is only applied when all nodes are at or above the version that introduced + * {@code TopNAggregate}, or we will break backward compatibility. + *
+ * To achieve this, the optimizer rule can use the minimum transport version from the + * {@link org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext}. If the minimum version is at or above the required version, + * the optimization can be applied; otherwise, it must be skipped. + *
+ * The minimum version is available throughout the planning process; it can also be used in the analyzer + * ({@link org.elasticsearch.xpack.esql.analysis.AnalyzerContext}), or during physical planning and optimization + * ({@link org.elasticsearch.xpack.esql.planner.mapper.Mapper}, {@link org.elasticsearch.xpack.esql.optimizer.PhysicalOptimizerContext}). + * + *
+ * Let's say we introduce an optimization in 9.5.0 that requires a different query plan that older nodes + * don't support. Backporting this to 9.4.9 is likely fine, as the minimum transport version of a mixed 9.4.x/9.5+ setup + * (cluster or CCS) will be 9.4.x, so this optimization will be (correctly) disabled on pre-9.4.9 nodes. + *
+ * However, if we also backport to 9.3.7, we have a problem: A 9.3.7/9.4.x/9.5+ setup will have a minimum transport version + * of 9.3.7, so the optimization will be enabled even if the 9.4.x nodes don't support the new plan because they're not patched + * to 9.4.9 yet. + *
+ * Rolling upgrades from 9.3.7 to 9.4.8 would have the same problem. That's generally ok, as rolling upgrades should be performed
+ * to the latest patch version of a given minor version; however, if 9.3.7 gets released before 9.4.9, users may not be
+ * able to perform rolling upgrades safely at all.
+ *
*/
public record Versioned