diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2e325678fef..66519ea302f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -8,9 +8,12 @@ - [ ] New functionality includes testing. - [ ] All tests pass, including unit test, integration test and doctest - [ ] New functionality has been documented. - - [ ] New functionality has javadoc added - - [ ] New functionality has user manual doc added -- [ ] Commits are signed per the DCO using --signoff + - [ ] New functionality has javadoc added. + - [ ] New functionality has a user manual doc added. +- [ ] New PPL command [checklist](https://github.com/opensearch-project/sql/blob/main/docs/dev/ppl-commands.md) all confirmed. +- [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [ ] Commits are signed per the DCO using `--signoff` or `-s`. +- [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/OpenSearch/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). \ No newline at end of file diff --git a/.gitignore b/.gitignore index b9775dea046..cfe14869fa1 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,6 @@ http-client.env.json /doctest/sql-cli/ /doctest/opensearch-job-scheduler/ .factorypath + +.clinerules/ +.claude/ \ No newline at end of file diff --git a/DEVELOPER_GUIDE.rst b/DEVELOPER_GUIDE.rst index c0b028d4226..353627996c6 100644 --- a/DEVELOPER_GUIDE.rst +++ b/DEVELOPER_GUIDE.rst @@ -90,20 +90,14 @@ Firstly you need to add the following configuration to the JVM used by your IDE. License Header -------------- -Because our code is licensed under Apache 2, you need to add the following license header to all new source code files. To automate this whenever creating new file, you can follow instructions for your IDE:: - - /* - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +Because our code is licensed under Apache 2, you need to add the following license header to all new source code files. To automate this whenever creating new file, you can follow instructions for your IDE. + +.. code:: java + + /* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ For example, `here are the instructions for adding copyright profiles in IntelliJ IDEA `__. @@ -211,6 +205,14 @@ Java files are formatted using `Spotless ` * - Javadoc format can be maintained by wrapping javadoc with `
` HTML tags
    * - Strings can be formatted on multiple lines with a `+` with the correct indentation for the string.
 
+Development Guidelines
+----------------------
+
+For detailed development documentation, please refer to the `development documentation `_. For specific guidance on implementing PPL components, see the following resources:
+
+- `PPL Commands `_: Guidelines for adding new commands to PPL
+- `PPL Functions `_: Instructions for implementing and integrating custom functions
+
 Building and Running Tests
 ==========================
 
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
index bab83ce88b2..463706d52c3 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
@@ -77,7 +77,6 @@
 import org.apache.calcite.runtime.Hook;
 import org.apache.calcite.schema.SchemaPlus;
 import org.apache.calcite.server.CalciteServerStatement;
-import org.apache.calcite.sql.SqlAggFunction;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.parser.SqlParserPos;
 import org.apache.calcite.sql2rel.SqlRexConvertletTable;
@@ -90,7 +89,7 @@
 import org.opensearch.sql.calcite.CalcitePlanContext;
 import org.opensearch.sql.calcite.plan.OpenSearchRules;
 import org.opensearch.sql.calcite.plan.Scannable;
-import org.opensearch.sql.calcite.udf.udaf.NullableSqlAvgAggFunction;
+import org.opensearch.sql.expression.function.PPLBuiltinOperators;
 
 /**
  * Calcite Tools Helper. This class is used to create customized: 1. Connection 2. JavaTypeFactory
@@ -186,7 +185,7 @@ public OpenSearchRelBuilder(Context context, RelOptCluster cluster, RelOptSchema
     public AggCall avg(boolean distinct, String alias, RexNode operand) {
       return aggregateCall(
           SqlParserPos.ZERO,
-          AVG_NULLABLE,
+          PPLBuiltinOperators.AVG_NULLABLE,
           distinct,
           false,
           false,
@@ -199,16 +198,6 @@ public AggCall avg(boolean distinct, String alias, RexNode operand) {
     }
   }
 
-  public static final SqlAggFunction AVG_NULLABLE = new NullableSqlAvgAggFunction(SqlKind.AVG);
-  public static final SqlAggFunction STDDEV_POP_NULLABLE =
-      new NullableSqlAvgAggFunction(SqlKind.STDDEV_POP);
-  public static final SqlAggFunction STDDEV_SAMP_NULLABLE =
-      new NullableSqlAvgAggFunction(SqlKind.STDDEV_SAMP);
-  public static final SqlAggFunction VAR_POP_NULLABLE =
-      new NullableSqlAvgAggFunction(SqlKind.VAR_POP);
-  public static final SqlAggFunction VAR_SAMP_NULLABLE =
-      new NullableSqlAvgAggFunction(SqlKind.VAR_SAMP);
-
   public static class OpenSearchPrepareImpl extends CalcitePrepareImpl {
     /**
      * Similar to {@link CalcitePrepareImpl#perform(CalciteServerStatement, FrameworkConfig,
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java
index d7bcd3526e7..e4b58c38662 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java
@@ -39,6 +39,10 @@ private PPLOperandTypes() {}
               OperandTypes.NUMERIC.or(
                   OperandTypes.family(SqlTypeFamily.NUMERIC, SqlTypeFamily.CHARACTER)));
 
+  public static final UDFOperandMetadata ANY_OPTIONAL_INTEGER =
+      UDFOperandMetadata.wrap(
+          (CompositeOperandTypeChecker)
+              OperandTypes.ANY.or(OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER)));
   public static final UDFOperandMetadata INTEGER_INTEGER =
       UDFOperandMetadata.wrap((FamilyOperandTypeChecker) OperandTypes.INTEGER_INTEGER);
   public static final UDFOperandMetadata STRING_STRING =
@@ -48,6 +52,12 @@ private PPLOperandTypes() {}
   public static final UDFOperandMetadata STRING_INTEGER =
       UDFOperandMetadata.wrap(OperandTypes.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER));
 
+  public static final UDFOperandMetadata NUMERIC_NUMERIC_OPTIONAL_NUMERIC =
+      UDFOperandMetadata.wrap(
+          (CompositeOperandTypeChecker)
+              OperandTypes.NUMERIC_NUMERIC.or(
+                  OperandTypes.family(
+                      SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC)));
   public static final UDFOperandMetadata NUMERIC_NUMERIC_NUMERIC =
       UDFOperandMetadata.wrap(
           OperandTypes.family(SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC));
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLReturnTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLReturnTypes.java
index c3b3f0f4f8f..bb0ea0831c5 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLReturnTypes.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLReturnTypes.java
@@ -5,10 +5,13 @@
 
 package org.opensearch.sql.calcite.utils;
 
+import java.util.List;
 import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.sql.type.ReturnTypes;
 import org.apache.calcite.sql.type.SqlReturnTypeInference;
 import org.apache.calcite.sql.type.SqlTypeTransforms;
+import org.apache.calcite.sql.type.SqlTypeUtil;
 import org.opensearch.sql.data.type.ExprCoreType;
 
 /**
@@ -39,4 +42,17 @@ private PPLReturnTypes() {}
         }
         return UserDefinedFunctionUtils.NULLABLE_TIMESTAMP_UDT;
       };
+  public static SqlReturnTypeInference ARG0_ARRAY =
+      opBinding -> {
+        RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
+
+        // Get argument types
+        List argTypes = opBinding.collectOperandTypes();
+
+        if (argTypes.isEmpty()) {
+          throw new IllegalArgumentException("Function requires at least one argument.");
+        }
+        RelDataType firstArgType = argTypes.get(0);
+        return SqlTypeUtil.createArrayType(typeFactory, firstArgType, true);
+      };
 }
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java
index 8c3be710538..48741a15e1a 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java
@@ -17,6 +17,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
 import java.util.TimeZone;
 import java.util.stream.Collectors;
@@ -28,7 +29,6 @@
 import org.apache.calcite.linq4j.tree.Expression;
 import org.apache.calcite.linq4j.tree.Expressions;
 import org.apache.calcite.rel.type.RelDataType;
-import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.schema.impl.AggregateFunctionImpl;
@@ -94,14 +94,15 @@ public class UserDefinedFunctionUtils {
   public static SqlUserDefinedAggFunction createUserDefinedAggFunction(
       Class> udafClass,
       String functionName,
-      SqlReturnTypeInference returnType) {
+      SqlReturnTypeInference returnType,
+      @Nullable UDFOperandMetadata operandMetadata) {
     return new SqlUserDefinedAggFunction(
         new SqlIdentifier(functionName, SqlParserPos.ZERO),
         SqlKind.OTHER_FUNCTION,
         returnType,
         null,
-        null,
-        AggregateFunctionImpl.create(udafClass),
+        operandMetadata,
+        Objects.requireNonNull(AggregateFunctionImpl.create(udafClass)),
         false,
         false,
         Optionality.FORBIDDEN);
@@ -126,45 +127,6 @@ public static RelBuilder.AggCall makeAggregateCall(
     return relBuilder.aggregateCall(aggFunction, addArgList);
   }
 
-  /**
-   * Creates and registers a User Defined Aggregate Function (UDAF) and returns an AggCall that can
-   * be used in query plans.
-   *
-   * @param udafClass The class implementing the aggregate function behavior
-   * @param functionName The name of the aggregate function
-   * @param returnType The return type inference for determining the result type
-   * @param fields The primary fields to aggregate
-   * @param argList Additional arguments for the aggregate function
-   * @param relBuilder The RelBuilder instance used for building relational expressions
-   * @return An AggCall object representing the aggregate function call
-   */
-  public static RelBuilder.AggCall createAggregateFunction(
-      Class> udafClass,
-      String functionName,
-      SqlReturnTypeInference returnType,
-      List fields,
-      List argList,
-      RelBuilder relBuilder) {
-    SqlUserDefinedAggFunction udaf =
-        createUserDefinedAggFunction(udafClass, functionName, returnType);
-    return makeAggregateCall(udaf, fields, argList, relBuilder);
-  }
-
-  public static SqlReturnTypeInference getReturnTypeInferenceForArray() {
-    return opBinding -> {
-      RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
-
-      // Get argument types
-      List argTypes = opBinding.collectOperandTypes();
-
-      if (argTypes.isEmpty()) {
-        throw new IllegalArgumentException("Function requires at least one argument.");
-      }
-      RelDataType firstArgType = argTypes.get(0);
-      return createArrayType(typeFactory, firstArgType, true);
-    };
-  }
-
   public static SqlTypeName convertRelDataTypeToSqlTypeName(RelDataType type) {
     if (type instanceof AbstractExprRelDataType) {
       AbstractExprRelDataType exprType = (AbstractExprRelDataType) type;
@@ -276,6 +238,38 @@ public UDFOperandMetadata getOperandMetadata() {
     };
   }
 
+  /**
+   * Adapts a method from the v2 implementation whose parameters include a {@link
+   * FunctionProperties} at the beginning to a Calcite-compatible UserDefinedFunctionBuilder.
+   */
+  public static ImplementorUDF adaptExprMethodWithPropertiesToUDF(
+      java.lang.reflect.Type type,
+      String methodName,
+      SqlReturnTypeInference returnTypeInference,
+      NullPolicy nullPolicy,
+      UDFOperandMetadata operandMetadata) {
+    NotNullImplementor implementor =
+        (translator, call, translatedOperands) -> {
+          List operands =
+              convertToExprValues(
+                  translatedOperands, call.getOperands().stream().map(RexNode::getType).collect(Collectors.toList()));
+          List operandsWithProperties = prependFunctionProperties(operands, translator);
+          Expression exprResult = Expressions.call(type, methodName, operandsWithProperties);
+          return Expressions.call(exprResult, "valueForCalcite");
+        };
+    return new ImplementorUDF(implementor, nullPolicy) {
+      @Override
+      public SqlReturnTypeInference getReturnTypeInference() {
+        return returnTypeInference;
+      }
+
+      @Override
+      public UDFOperandMetadata getOperandMetadata() {
+        return operandMetadata;
+      }
+    };
+  }
+
   /**
    * Adapt a static math function (e.g., Math.expm1, Math.rint) to a UserDefinedFunctionBuilder.
    * This method generates a Calcite-compatible UDF by boxing the operand, converting it to a
@@ -326,32 +320,4 @@ public static List prependFunctionProperties(
     operandsWithProperties.add(0, properties);
     return Collections.unmodifiableList(operandsWithProperties);
   }
-
-  public static ImplementorUDF adaptExprMethodWithPropertiesToUDF(
-      java.lang.reflect.Type type,
-      String methodName,
-      SqlReturnTypeInference returnTypeInference,
-      NullPolicy nullPolicy,
-      UDFOperandMetadata operandMetadata) {
-    NotNullImplementor implementor =
-        (translator, call, translatedOperands) -> {
-          List operands =
-              convertToExprValues(
-                  translatedOperands, call.getOperands().stream().map(RexNode::getType).collect(Collectors.toList()));
-          List operandsWithProperties = prependFunctionProperties(operands, translator);
-          Expression exprResult = Expressions.call(type, methodName, operandsWithProperties);
-          return Expressions.call(exprResult, "valueForCalcite");
-        };
-    return new ImplementorUDF(implementor, nullPolicy) {
-      @Override
-      public SqlReturnTypeInference getReturnTypeInference() {
-        return returnTypeInference;
-      }
-
-      @Override
-      public UDFOperandMetadata getOperandMetadata() {
-        return operandMetadata;
-      }
-    };
-  }
 }
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java
index c78c921330e..a4acf68ffef 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java
@@ -8,6 +8,7 @@
 import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.adaptExprMethodToUDF;
 import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.adaptExprMethodWithPropertiesToUDF;
 import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.adaptMathFunctionToUDF;
+import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.createUserDefinedAggFunction;
 
 import com.google.common.base.Suppliers;
 import java.lang.reflect.InvocationTargetException;
@@ -21,11 +22,17 @@
 import org.apache.calcite.avatica.util.TimeUnit;
 import org.apache.calcite.linq4j.tree.Expression;
 import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.type.ReturnTypes;
 import org.apache.calcite.sql.type.SqlTypeTransforms;
 import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable;
 import org.apache.calcite.util.BuiltInMethod;
+import org.opensearch.sql.calcite.udf.udaf.LogPatternAggFunction;
+import org.opensearch.sql.calcite.udf.udaf.NullableSqlAvgAggFunction;
+import org.opensearch.sql.calcite.udf.udaf.PercentileApproxFunction;
+import org.opensearch.sql.calcite.udf.udaf.TakeAggFunction;
 import org.opensearch.sql.calcite.utils.PPLOperandTypes;
 import org.opensearch.sql.calcite.utils.PPLReturnTypes;
 import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
@@ -397,6 +404,35 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
   public static final SqlOperator ENHANCED_COALESCE =
       new EnhancedCoalesceFunction().toUDF("COALESCE");
 
+      // Aggregation functions
+  public static final SqlAggFunction AVG_NULLABLE = new NullableSqlAvgAggFunction(SqlKind.AVG);
+  public static final SqlAggFunction STDDEV_POP_NULLABLE =
+      new NullableSqlAvgAggFunction(SqlKind.STDDEV_POP);
+  public static final SqlAggFunction STDDEV_SAMP_NULLABLE =
+      new NullableSqlAvgAggFunction(SqlKind.STDDEV_SAMP);
+  public static final SqlAggFunction VAR_POP_NULLABLE =
+      new NullableSqlAvgAggFunction(SqlKind.VAR_POP);
+  public static final SqlAggFunction VAR_SAMP_NULLABLE =
+      new NullableSqlAvgAggFunction(SqlKind.VAR_SAMP);
+  public static final SqlAggFunction TAKE =
+      createUserDefinedAggFunction(
+          TakeAggFunction.class,
+          "TAKE",
+          PPLReturnTypes.ARG0_ARRAY,
+          PPLOperandTypes.ANY_OPTIONAL_INTEGER);
+  public static final SqlAggFunction PERCENTILE_APPROX =
+      createUserDefinedAggFunction(
+          PercentileApproxFunction.class,
+          "percentile_approx",
+          ReturnTypes.ARG0_FORCE_NULLABLE,
+          PPLOperandTypes.NUMERIC_NUMERIC_OPTIONAL_NUMERIC);
+  public static final SqlAggFunction INTERNAL_PATTERN =
+      createUserDefinedAggFunction(
+          LogPatternAggFunction.class,
+          "pattern",
+          ReturnTypes.explicit(UserDefinedFunctionUtils.nullablePatternAggList),
+          null);
+
   /**
    * Returns the PPL specific operator table, creating it if necessary.
    *
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
index cec61a28184..51281df7c56 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
@@ -6,13 +6,8 @@
 package org.opensearch.sql.expression.function;
 
 import static org.apache.calcite.sql.SqlJsonConstructorNullClause.NULL_ON_NULL;
-import static org.opensearch.sql.calcite.utils.CalciteToolsHelper.STDDEV_POP_NULLABLE;
-import static org.opensearch.sql.calcite.utils.CalciteToolsHelper.STDDEV_SAMP_NULLABLE;
-import static org.opensearch.sql.calcite.utils.CalciteToolsHelper.VAR_POP_NULLABLE;
-import static org.opensearch.sql.calcite.utils.CalciteToolsHelper.VAR_SAMP_NULLABLE;
 import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
 import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.getLegacyTypeName;
-import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.createAggregateFunction;
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.ABS;
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.ACOS;
 import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADD;
@@ -249,7 +244,6 @@
 import org.apache.calcite.sql.type.CompositeOperandTypeChecker;
 import org.apache.calcite.sql.type.ImplicitCastOperandTypeChecker;
 import org.apache.calcite.sql.type.OperandTypes;
-import org.apache.calcite.sql.type.ReturnTypes;
 import org.apache.calcite.sql.type.SameOperandTypeChecker;
 import org.apache.calcite.sql.type.SqlOperandTypeChecker;
 import org.apache.calcite.sql.type.SqlTypeFamily;
@@ -261,9 +255,6 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.opensearch.sql.calcite.CalcitePlanContext;
-import org.opensearch.sql.calcite.udf.udaf.LogPatternAggFunction;
-import org.opensearch.sql.calcite.udf.udaf.PercentileApproxFunction;
-import org.opensearch.sql.calcite.udf.udaf.TakeAggFunction;
 import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
 import org.opensearch.sql.calcite.utils.PlanUtils;
 import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
@@ -598,7 +589,6 @@ void registerOperator(BuiltinFunctionName functionName, SqlOperator... operators
         } else {
           typeChecker = operator.getOperandTypeChecker();
         }
-
         PPLTypeChecker pplTypeChecker =
             wrapSqlOperandTypeChecker(
                 typeChecker, operator.getName(), operator instanceof SqlUserDefinedFunction);
@@ -1023,13 +1013,6 @@ void populate() {
                       builder.makeLiteral("\\")),
           PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING));
     }
-
-    private static SqlOperandTypeChecker extractTypeCheckerFromUDF(
-        SqlUserDefinedFunction udfOperator) {
-      UDFOperandMetadata udfOperandMetadata =
-          (UDFOperandMetadata) udfOperator.getOperandTypeChecker();
-      return (udfOperandMetadata == null) ? null : udfOperandMetadata.getInnerTypeChecker();
-    }
   }
 
   private static class Builder extends AbstractBuilder {
@@ -1061,12 +1044,16 @@ void register(
     }
 
     void registerOperator(BuiltinFunctionName functionName, SqlAggFunction aggFunction) {
+      SqlOperandTypeChecker innerTypeChecker = extractTypeCheckerFromUDF(aggFunction);
       PPLTypeChecker typeChecker =
-          wrapSqlOperandTypeChecker(aggFunction.getOperandTypeChecker(), functionName.name(), true);
+          wrapSqlOperandTypeChecker(innerTypeChecker, functionName.name(), true);
       AggHandler handler =
-          (distinct, field, argList, ctx) ->
-              UserDefinedFunctionUtils.makeAggregateCall(
-                  aggFunction, List.of(field), argList, ctx.relBuilder);
+          (distinct, field, argList, ctx) -> {
+            List newArgList =
+                argList.stream().map(PlanUtils::derefMapCall).collect(Collectors.toList());
+            return UserDefinedFunctionUtils.makeAggregateCall(
+                aggFunction, List.of(field), newArgList, ctx.relBuilder);
+          };
       register(functionName, handler, typeChecker);
     }
 
@@ -1074,6 +1061,12 @@ void populate() {
       registerOperator(MAX, SqlStdOperatorTable.MAX);
       registerOperator(MIN, SqlStdOperatorTable.MIN);
       registerOperator(SUM, SqlStdOperatorTable.SUM);
+      registerOperator(VARSAMP, PPLBuiltinOperators.VAR_SAMP_NULLABLE);
+      registerOperator(VARPOP, PPLBuiltinOperators.VAR_POP_NULLABLE);
+      registerOperator(STDDEV_SAMP, PPLBuiltinOperators.STDDEV_SAMP_NULLABLE);
+      registerOperator(STDDEV_POP, PPLBuiltinOperators.STDDEV_POP_NULLABLE);
+      registerOperator(TAKE, PPLBuiltinOperators.TAKE);
+      registerOperator(INTERNAL_PATTERN, PPLBuiltinOperators.INTERNAL_PATTERN);
 
       register(
           AVG,
@@ -1095,86 +1088,23 @@ void populate() {
           wrapSqlOperandTypeChecker(
               SqlStdOperatorTable.COUNT.getOperandTypeChecker(), COUNT.name(), false));
 
-      register(
-          VARSAMP,
-          (distinct, field, argList, ctx) -> ctx.relBuilder.aggregateCall(VAR_SAMP_NULLABLE, field),
-          wrapSqlOperandTypeChecker(
-              SqlStdOperatorTable.VAR_SAMP.getOperandTypeChecker(), VARSAMP.name(), false));
-
-      register(
-          VARPOP,
-          (distinct, field, argList, ctx) -> ctx.relBuilder.aggregateCall(VAR_POP_NULLABLE, field),
-          wrapSqlOperandTypeChecker(
-              SqlStdOperatorTable.VAR_POP.getOperandTypeChecker(), VARPOP.name(), false));
-
-      register(
-          STDDEV_SAMP,
-          (distinct, field, argList, ctx) ->
-              ctx.relBuilder.aggregateCall(STDDEV_SAMP_NULLABLE, field),
-          wrapSqlOperandTypeChecker(
-              SqlStdOperatorTable.STDDEV_SAMP.getOperandTypeChecker(), STDDEV_SAMP.name(), false));
-
-      register(
-          STDDEV_POP,
-          (distinct, field, argList, ctx) ->
-              ctx.relBuilder.aggregateCall(STDDEV_POP_NULLABLE, field),
-          wrapSqlOperandTypeChecker(
-              SqlStdOperatorTable.STDDEV_POP.getOperandTypeChecker(), STDDEV_POP.name(), false));
-
-      register(
-          TAKE,
-          (distinct, field, argList, ctx) -> {
-            List newArgList =
-                argList.stream().map(PlanUtils::derefMapCall).collect(Collectors.toList());
-            return createAggregateFunction(
-                TakeAggFunction.class,
-                "TAKE",
-                UserDefinedFunctionUtils.getReturnTypeInferenceForArray(),
-                List.of(field),
-                newArgList,
-                ctx.relBuilder);
-          },
-          PPLTypeChecker.wrapComposite(
-              (CompositeOperandTypeChecker)
-                  OperandTypes.ANY.or(
-                      OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER)),
-              false));
-
       register(
           PERCENTILE_APPROX,
           (distinct, field, argList, ctx) -> {
             List newArgList =
                 argList.stream().map(PlanUtils::derefMapCall).collect(Collectors.toList());
             newArgList.add(ctx.rexBuilder.makeFlag(field.getType().getSqlTypeName()));
-            return createAggregateFunction(
-                PercentileApproxFunction.class,
-                "percentile_approx",
-                ReturnTypes.ARG0_FORCE_NULLABLE,
-                List.of(field),
-                newArgList,
-                ctx.relBuilder);
+            return UserDefinedFunctionUtils.makeAggregateCall(
+                PPLBuiltinOperators.PERCENTILE_APPROX, List.of(field), newArgList, ctx.relBuilder);
           },
-          PPLTypeChecker.wrapComposite(
-              (CompositeOperandTypeChecker)
-                  OperandTypes.NUMERIC_NUMERIC.or(
-                      OperandTypes.family(
-                          SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC)),
+          wrapSqlOperandTypeChecker(
+              extractTypeCheckerFromUDF(PPLBuiltinOperators.PERCENTILE_APPROX),
+              PERCENTILE_APPROX.name(),
               false));
-
-      register(
-          INTERNAL_PATTERN,
-          (distinct, field, argList, ctx) ->
-              createAggregateFunction(
-                  LogPatternAggFunction.class,
-                  "pattern",
-                  ReturnTypes.explicit(UserDefinedFunctionUtils.nullablePatternAggList),
-                  List.of(field),
-                  argList,
-                  ctx.relBuilder),
-          null);
     }
   }
 
+
   /**
    * Get a string representation of the argument types expressed in ExprType for error messages.
    *
@@ -1190,59 +1120,84 @@ private static String getActualSignature(List argTypes) {
         + "]";
   }
 
-    /**
-     * Wraps a {@link SqlOperandTypeChecker} into a {@link PPLTypeChecker} for use in function
-     * signature validation.
-     *
-     * @param typeChecker the original SQL operand type checker
-     * @param functionName the name of the function for error reporting
-     * @param isUserDefinedFunction true if the function is user-defined, false otherwise
-     * @return a {@link PPLTypeChecker} that delegates to the provided {@code typeChecker}
-     */
-    private static PPLTypeChecker wrapSqlOperandTypeChecker(
-            SqlOperandTypeChecker typeChecker, String functionName, boolean isUserDefinedFunction) {
-        PPLTypeChecker pplTypeChecker;
-        // Only the composite operand type checker for UDFs are concerned here.
-        if (isUserDefinedFunction
-                && typeChecker instanceof CompositeOperandTypeChecker) {
-            // UDFs implement their own composite type checkers, which always use OR logic for
-            // argument types. Verifying the composition type would require accessing a protected field in
-            // CompositeOperandTypeChecker. If access to this field is not allowed, type checking will
-            // be skipped, so we avoid checking the composition type here.
-            CompositeOperandTypeChecker compositeTypeChecker = (CompositeOperandTypeChecker) typeChecker;
-            pplTypeChecker = PPLTypeChecker.wrapComposite(compositeTypeChecker, false);
-        } else if (typeChecker instanceof ImplicitCastOperandTypeChecker) {
-            ImplicitCastOperandTypeChecker implicitCastTypeChecker = (ImplicitCastOperandTypeChecker) typeChecker;
-            pplTypeChecker = PPLTypeChecker.wrapFamily(implicitCastTypeChecker);
-        } else if (typeChecker instanceof CompositeOperandTypeChecker) {
-            // If compositeTypeChecker contains operand checkers other than family type checkers or
-            // other than OR compositions, the function with be registered with a null type checker,
-            // which means the function will not be type checked.
-            CompositeOperandTypeChecker compositeTypeChecker = (CompositeOperandTypeChecker) typeChecker;
-            try {
-                pplTypeChecker = PPLTypeChecker.wrapComposite(compositeTypeChecker, true);
-            } catch (IllegalArgumentException | UnsupportedOperationException e) {
-                logger.debug(
-                        String.format(
-                                "Failed to create composite type checker for operator: %s. Will skip its type"
-                                        + " checking",
-                                functionName),
-                        e);
-                pplTypeChecker = null;
-            }
-        } else if (typeChecker instanceof SameOperandTypeChecker) {
-            // Comparison operators like EQUAL, GREATER_THAN, LESS_THAN, etc.
-            // SameOperandTypeCheckers like COALESCE, IFNULL, etc.
-            SameOperandTypeChecker comparableTypeChecker = (SameOperandTypeChecker) typeChecker;
-            pplTypeChecker = PPLTypeChecker.wrapComparable(comparableTypeChecker);
-        } else if (typeChecker instanceof UDFOperandMetadata.UDTOperandMetadata) {
-            UDFOperandMetadata.UDTOperandMetadata udtOperandMetadata = (UDFOperandMetadata.UDTOperandMetadata) typeChecker;
-            pplTypeChecker = PPLTypeChecker.wrapUDT(udtOperandMetadata.getAllowSignatures());
-        } else {
-            logger.info(
-                    "Cannot create type checker for function: {}. Will skip its type checking", functionName);
+  /**
+   * Wraps a {@link SqlOperandTypeChecker} into a {@link PPLTypeChecker} for use in function
+   * signature validation.
+   *
+   * @param typeChecker the original SQL operand type checker
+   * @param functionName the name of the function for error reporting
+   * @param isUserDefinedFunction true if the function is user-defined, false otherwise
+   * @return a {@link PPLTypeChecker} that delegates to the provided {@code typeChecker}
+   */
+  private static PPLTypeChecker wrapSqlOperandTypeChecker(
+        SqlOperandTypeChecker typeChecker, String functionName, boolean isUserDefinedFunction) {
+    PPLTypeChecker pplTypeChecker;
+    // Only the composite operand type checker for UDFs are concerned here.
+    if (isUserDefinedFunction
+            && typeChecker instanceof CompositeOperandTypeChecker) {
+        // UDFs implement their own composite type checkers, which always use OR logic for
+        // argument types. Verifying the composition type would require accessing a protected field in
+        // CompositeOperandTypeChecker. If access to this field is not allowed, type checking will
+        // be skipped, so we avoid checking the composition type here.
+        CompositeOperandTypeChecker compositeTypeChecker = (CompositeOperandTypeChecker) typeChecker;
+        pplTypeChecker = PPLTypeChecker.wrapComposite(compositeTypeChecker, false);
+    } else if (typeChecker instanceof ImplicitCastOperandTypeChecker) {
+        ImplicitCastOperandTypeChecker implicitCastTypeChecker = (ImplicitCastOperandTypeChecker) typeChecker;
+        pplTypeChecker = PPLTypeChecker.wrapFamily(implicitCastTypeChecker);
+    } else if (typeChecker instanceof CompositeOperandTypeChecker) {
+        // If compositeTypeChecker contains operand checkers other than family type checkers or
+        // other than OR compositions, the function with be registered with a null type checker,
+        // which means the function will not be type checked.
+        CompositeOperandTypeChecker compositeTypeChecker = (CompositeOperandTypeChecker) typeChecker;
+        try {
+            pplTypeChecker = PPLTypeChecker.wrapComposite(compositeTypeChecker, true);
+        } catch (IllegalArgumentException | UnsupportedOperationException e) {
+            logger.debug(
+                    String.format(
+                            "Failed to create composite type checker for operator: %s. Will skip its type"
+                                    + " checking",
+                            functionName),
+                    e);
             pplTypeChecker = null;
         }
-        return pplTypeChecker;
+    } else if (typeChecker instanceof SameOperandTypeChecker) {
+        // Comparison operators like EQUAL, GREATER_THAN, LESS_THAN, etc.
+        // SameOperandTypeCheckers like COALESCE, IFNULL, etc.
+        SameOperandTypeChecker comparableTypeChecker = (SameOperandTypeChecker) typeChecker;
+        pplTypeChecker = PPLTypeChecker.wrapComparable(comparableTypeChecker);
+    } else if (typeChecker instanceof UDFOperandMetadata.UDTOperandMetadata) {
+        UDFOperandMetadata.UDTOperandMetadata udtOperandMetadata = (UDFOperandMetadata.UDTOperandMetadata) typeChecker;
+        pplTypeChecker = PPLTypeChecker.wrapUDT(udtOperandMetadata.getAllowSignatures());
+    } else {
+        logger.info(
+                "Cannot create type checker for function: {}. Will skip its type checking", functionName);
+        pplTypeChecker = null;
     }
-}
+    return pplTypeChecker;
+  }
+
+
+  /**
+   * Extracts the underlying {@link SqlOperandTypeChecker} from a {@link SqlOperator}.
+   *
+   * 

For user-defined functions (UDFs) and user-defined aggregate functions (UDAFs), the {@link + * SqlOperandTypeChecker} is typically wrapped in a {@link UDFOperandMetadata}, which contains the + * actual type checker used for operand validation. Most of these wrapped type checkers are + * defined in {@link org.opensearch.sql.calcite.utils.PPLOperandTypes}. This method retrieves the + * inner type checker from {@link UDFOperandMetadata} if present. + * + *

For Calcite's built-in operators, its type checker is returned directly. + * + * @param operator the {@link SqlOperator}, which may be a Calcite built-in operator, a + * user-defined function, or a user-defined aggregation function + * @return the underlying {@link SqlOperandTypeChecker} instance, or {@code null} if not available + */ + private static SqlOperandTypeChecker extractTypeCheckerFromUDF(SqlOperator operator) { + SqlOperandTypeChecker typeChecker = operator.getOperandTypeChecker(); + if (typeChecker instanceof UDFOperandMetadata) { + UDFOperandMetadata udfOperandMetadata = (UDFOperandMetadata) typeChecker; + return udfOperandMetadata.getInnerTypeChecker(); + } + return typeChecker; + } +} \ No newline at end of file diff --git a/docs/dev/index.md b/docs/dev/index.md index 4b8745e2dfa..fa19a6484c6 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -38,6 +38,8 @@ + [Nested Function In Select Clause](sql-nested-function-select-clause.md): Nested function support in sql select clause + [Nested Function In Where Clause](sql-nested-function-where-clause.md): Nested function support in sql where clause + **Piped Processing Language** + + [PPL Command Checklist](ppl-commands.md): A checklist of developing a new PPL command + + [PPL Functions](ppl-functions.md): Guidance on developing a PPL function ### Query Processing diff --git a/docs/dev/ppl-commands.md b/docs/dev/ppl-commands.md new file mode 100644 index 00000000000..9d62e607f86 --- /dev/null +++ b/docs/dev/ppl-commands.md @@ -0,0 +1,57 @@ +# New PPL Command Checklist + +If you are working on contributing a new PPL command, please read this guide and review all items in the checklist are done before code review. You also can leverage this checklist to guide how to add new PPL command. + +## Prerequisite + +- [ ] **Open an RFC issue before starting to code:** + - Describe the purpose of the new command + - Include at least syntax definition, usage and examples + - Implementation options are welcome if you have multiple ways to implement it + +- [ ] **Obtain PM review approval for the RFC:** + - If PM unavailable, consult repository maintainers as alternative + - An offline meeting might be required to discuss the syntax and usage + +## Coding & Tests + +- [ ] **Lexer/Parser Updates:** + - Add new keywords to OpenSearchPPLLexer.g4 + - Add grammar rules to OpenSearchPPLParser.g4 + - Update `commandName` and `keywordsCanBeId` + +- [ ] **AST Implementation:** + - Add new tree nodes under package `org.opensearch.sql.ast.tree` + - Prefer reusing `Argument` for command arguments **over** creating new expression nodes under `org.opensearch.sql.ast.expression` + +- [ ] **Visitor Pattern:** + - Add `visit*` in `AbstractNodeVisitor` + - Overriding `visit*` in `Analyzer`, `CalciteRelNodeVisitor` and `PPLQueryDataAnonymizer` + +- [ ] **Unit Tests:** + - Extend `CalcitePPLAbstractTest` + - Keep test queries minimal + - Include `verifyLogical()` and `verifyPPLToSparkSQL()` + +- [ ] **Integration tests (pushdown):** + - Extend `PPLIntegTestCase` + - Use complex real-world queries + - Include `verifySchema()` and `verifyDataRows()` + +- [ ] **Integration tests (Non-pushdown):** + - Add test class to `CalciteNoPushdownIT` + +- [ ] **Explain tests:** + - Add tests to `ExplainIT` or `CalciteExplainIT` + +- [ ] **Unsupported in v2 test:** + - Add a test in `NewAddedCommandsIT` + +- [ ] **Anonymizer tests:** + - Add a test in `PPLQueryDataAnonymizerTest` + +- [ ] **Cross-cluster Tests (optional, nice to have):** + - Add a test in `CrossClusterSearchIT` + +- [ ] **User doc:** + - Add a xxx.rst under `docs/user/ppl/cmd` and link the new doc to `docs/user/ppl/index.rst` diff --git a/docs/dev/ppl-functions.md b/docs/dev/ppl-functions.md new file mode 100644 index 00000000000..1d4085ebf62 --- /dev/null +++ b/docs/dev/ppl-functions.md @@ -0,0 +1,218 @@ +# Developing PPL Functions + +This guide explains how to develop and implement functions for PPL with Calcite. + +## Prerequisites + +- [ ] Create an issue describing the purpose and expected behavior of the function +- [ ] Ensure the function name is recognized by PPL syntax by checking ``OpenSearchPPLLexer.g4``, + ``OpenSearchPPLParser.g4``, and ``BuiltinFunctionName.java`` +- [ ] Plan the documentation of the function under ``docs/user/ppl/functions/`` directory + +## User-Defined Functions (UDFs) + +A user-defined function is an instance +of [SqlOperator](https://calcite.apache.org/javadocAggregate/org/apache/calcite/sql/SqlOperator.html) that transforms +input row expressions ([RexNode](https://calcite.apache.org/javadocAggregate/org/apache/calcite/rex/RexNode.html)) into +a new one. + +### Creating UDFs + +There are mainly three approaches to implementing UDFs: + +#### 1. Use existing Calcite operators + +Leverage operators already declared in +Calcite's [SqlStdOperatorTable](https://calcite.apache.org/javadocAggregate/org/apache/calcite/sql/fun/SqlStdOperatorTable.html) +or [SqlLibraryOperators](https://calcite.apache.org/javadocAggregate/org/apache/calcite/sql/fun/SqlLibraryOperators.html), +and defined +in [RexImpTable.java](https://calcite.apache.org/javadocAggregate/org/apache/calcite/adapter/enumerable/RexImpTable.html). +For example, `SqlStdOperatorTable.PLUS` is used as one of the implementations for `+` in PPL. + +This approach is useful when the function you need to implement already exists in Apache Calcite and you just need to +expose it through your PPL interface. + +#### 2. Adapt existing static methods + +Adapt Java static methods to UDFs using utility functions like `UserDefinedFunctionUtils.adapt*ToUDF`. + +This approach allows you to leverage existing Java methods by wrapping them as UDFs, which can be more straightforward +than implementing from scratch. + +Among existing adaptation utilities, `adaptExprMethodToUDF` adapts a v2 function implementation into a UDF builder, while `adaptMathFunctionToUDF` adapts a static function from `java.lang.Math` to a +UDF builder. You can create your own adaptation utilities if you need to adapt other kinds of static methods to UDFs. + +Example: + +```java +SqlOperator SINH = adaptMathFunctionToUDF( + "sinh", ReturnTypes.DOUBLE_FORCE_NULLABLE, NullPolicy.ANY, PPLOperandTypes.NUMERIC) + .toUDF("SINH"); +``` + +#### 3. Implement from scratch + +For more complex functions or when you need complete control over the implementation: + +1. Implement the `ImplementorUDF` interface, which is a simplified interface for creating + a [SqlUserDefinedFunction](https://calcite.apache.org/javadocAggregate/org/apache/calcite/sql/validate/SqlUserDefinedFunction.html). +2. Instantiate and convert it to a `SqlOperator` in `PPLBuiltinOperators` +3. For optimal UDF performance, implement any data-independent logic during the compilation phase instead of at runtime. + Specifically, + use [linq4j expressions](https://calcite.apache.org/javadocAggregate/org/apache/calcite/linq4j/tree/Expression.html) + for these operations rather than internal static method calls, as expressions are evaluated during compilation. + +Example: + +```java +public class MyCustomUDF extends ImplementorUDF { + // Define operand types, return types, null policies, and constructors + // ... + + public static class Crc32Implementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + // Implementation details goes here ... + } + } +} + +// Converting to SqlUserDefinedFunction (an extension of SqlOperator) +SqlOperator myOperator = new MyCustomUDF().toUDF("FUNC_NAME"); +``` + +### Type Checking for UDFs + +Type checking ensures that functions receive the correct argument types: + +- Each `SqlOperator` provides an operand type checker via its [ + `getOperandTypeChecker`](https://calcite.apache.org/javadocAggregate/org/apache/calcite/sql/SqlOperator.html#getOperandTypeChecker()) + method +- Calcite's built-in operators come with predefined type checkers of type [`SqlOperandTypeChecker`](https://calcite.apache.org/javadocAggregate/org/apache/calcite/sql/type/SqlOperandTypeChecker.html) +- For custom UDFs, the `UDFOperandMetadata` interface is used to feed function type information so that a + `SqlOperandTypeChecker` can be retrieved in the same way as Calcite's built-in operators. Most of the operand types + are defined in `PPLOperandTypes` as instances of `UDFOperandMetadata`. E.g. `PPLOperandTypes.NUMERIC_NUMERIC` +- Since `SqlOperandTypeChecker` works on parsed SQL trees (which aren't directly accessible in our architecture), the + `PPLTypeChecker` interface was created to perform actual type checking. Most instances of `PPLTypeChecker` are created + by wrapping Calcite's built-in type checkers. + +The following code snippet explains their relationships: + +```java +// For built-in Calcite operators +SqlOperandTypeChecker cosSqlTypeChecker = SqlStdOperatorTable.COS.getOperandTypeChecker(); // FamilyOperandTypeChecker(NUMERIC) + +// For user defined functions +// UDFOperandMetadata wraps a SqlOperandTypeChecker, so that the type information can be fed to a SqlUserDefinedFunction. +// Refer to the javadoc of UDFOperandMetadata class for more details on why this workaround is necessary +UDFOperandMetadata NUMERIC = UDFOperandMetadata.wrap((FamilyOperandTypeChecker) OperandTypes.NUMERIC); +SqlOperator COSH = + adaptMathFunctionToUDF( + "cosh", ReturnTypes.DOUBLE_FORCE_NULLABLE, NullPolicy.ANY, NUMERIC) + .toUDF("COSH"); +SqlOperandTypeChecker coshTypeChecker = COSH.getOperandTypeChecker().getInnerTypeChecker(); // FamilyOperandTypeChecker(NUMERIC) + +// SqlOperandTypeChecker works on parsed SQL trees, which don't exist in our architecture, so it cannot be directly +// applied to check operand types. We create another interface PPLTypeChecker to do the actual type checking. +// It works by retrieving operand type information from a SqlOperandTypeChecker, then checking against actual argument types. +PPLTypeChecker cosPplTypeChecker = PPLTypeChecker.wrapFamily(cosSqlTypeChecker); +// Equivalently, PPL type checkers can be created by directly specifying expected operand types +PPLTypeChecker numericTypeChecker = PPLTypeChecker.family(SqlTypeFamily.NUMERIC); +``` + +### Registering UDFs + +#### Preferred registration API + +UDFs should be registered in `PPLFuncImpTable`. The preferred API is + +```java +AbstractBuilder:: + +registerOperator(BuiltinFunctionName functionName, SqlOperator... operators)` +``` + +- It automatically extracts type checkers from operators and converts them to `PPLTypeChecker` instances +- Multiple implementations can be registered to the same function name for overloading +- The system will try to resolve functions based on argument types, with automatic coercion when needed + +For example, the following statement registers calcite's built-in `COS` operator as the cosine function in PPL. Under the +hood, it first retrieves a `SqlOperandTypeChecker` from `SqlStdOperatorTable.COS`, then converts it to a `PPLTypeChecker`, +finally registers it as `cos` function in PPL function registry. + +```java +registerOperator(COS, SqlStdOperatorTable.COS); +``` + +The following example shows how to register overloadings to the same function name. `+` operator is registered for both +and number addition and string concatenation, controlled via type checkers. I.e. if both operands are number, they will +be resolved to `SqlStdOperatorTable.PLUS` since the operand types does not pass the type checking of +`SqlStdOperatorTable.CONCAT`, +which requires two strings. + +```java +registerOperator(ADD, SqlStdOperatorTable.PLUS, SqlStdOperatorTable.CONCAT); +``` + +#### Lower-level registration API + +```java +AbstractBuilder:: + +register(BuiltinFunctionName functionName, FunctionImp functionImp, PPLTypeChecker typeChecker) +``` + +Use this approach when: + +- You need a custom type checker +- You want to customize an existing function by tweaking its arguments +- Setting `typeChecker` to `null` will bypass type checking (use with caution) + +### External Functions + +Some functions implementation depend on underlying data sources. They should be registered with +`PPLFuncImpTable::registerExternalOperator` +For example, the `GEOIP` function relies on +the [opensearch-geospatial](https://github.com/opensearch-project/geospatial) plugin. It is registered as an external +function in `OpenSearchExecutionEngine`. + +### Testing UDFs + +Comprehensive testing is essential for UDFs: + +- Integration tests in `Calcite*IT` classes to verify function result correctness +- Unit tests in `CalcitePPLFunctionTypeTest` to validate type checker behavior +- Push-down tests in `CalciteExplainIT` if the function can be pushed down as a domain-specific language (DSL) + +## User-Defined Aggregation Functions (UDAFs) + +User-defined aggregation functions aggregate data across multiple rows. + +### Creating UDAFs + +There are two main approaches to create a UDAF: + +#### 1. Use existing Calcite aggregation operators + +Leverage existing aggregation operators from Calcite if they match your requirements. + +#### 2. Implement from scratch + +For custom aggregation logic: + +1. Extend `SqlUserDefinedAggFunction` with custom aggregation logic +2. Instantiate the new aggregation function in `PPLBuiltinOperators` + +### Registering UDAFs + +- Use `AggBuilder::registerOperator(BuiltinFunctionName functionName, SqlAggFunction aggFunction)` for standard + registration +- For more control, use + `AggBuilder::register(BuiltinFunctionName functionName, AggHandler aggHandler, PPLTypeChecker typeChecker)` +- For functions dependent on data engines, use `PPLFuncImpTable::registerExternalAggOperator` + +### Testing UDAFs + +- Verify result correctness in `CalcitePPLAggregationIT` +- Test logical plans in `CalcitePPLAggregationTest` \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index a4b3e1d569c..c0ec4bd59e5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -292,7 +292,8 @@ private void registerOpenSearchFunctions() { UserDefinedFunctionUtils.createUserDefinedAggFunction( DistinctCountApproxAggFunction.class, "APPROX_DISTINCT_COUNT", - ReturnTypes.BIGINT_FORCE_NULLABLE); + ReturnTypes.BIGINT_FORCE_NULLABLE, + null); PPLFuncImpTable.INSTANCE.registerExternalAggOperator( BuiltinFunctionName.DISTINCT_COUNT_APPROX, approxDistinctCountFunction); }