feat: add median and count_distinct aggregation functions (#278)

substrait-io · Sep 5, 2022 · 9be62e5 · 9be62e5
1 parent 96b13d7
commit 9be62e5
Showing 1 changed file with 144 additions and 0 deletions.
diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml
@@ -1133,6 +1133,150 @@ aggregate_functions:
           - value: fp64
         nullability: DECLARED_OUTPUT
         return: fp64?
+  - name: "median"
+    description: > 
+      Calculate the median for a set of values.
+
+      Returns null if applied to zero records. For the integer implementations,
+      the rounding option determines how the median should be rounded if it ends
+      up midway between two values. For the floating point implementations,
+      they specify the usual floating point rounding mode.
+    impls:
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+            required: true
+          - name: rounding
+            options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+            required: false
+          - value: i8
+        nullability: DECLARED_OUTPUT
+        return: i8?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+            required: true
+          - name: rounding
+            options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+            required: false
+          - value: i16
+        nullability: DECLARED_OUTPUT
+        return: i16?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+            required: true
+          - name: rounding
+            options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+            required: false
+          - value: i32
+        nullability: DECLARED_OUTPUT
+        return: i32?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+            required: true
+          - name: rounding
+            options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+            required: false
+          - value: i64
+        nullability: DECLARED_OUTPUT
+        return: i64?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+            required: true
+          - name: rounding
+            options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+            required: false
+          - value: fp32
+        nullability: DECLARED_OUTPUT
+        return: fp32?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+            required: true
+          - name: rounding
+            options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+            required: false
+          - value: fp64
+        nullability: DECLARED_OUTPUT
+        return: fp64?
+  - name: "count_distinct"
+    description: Count of unique values in a set of values.
+    impls:
+      - args:
+          - options: [SILENT, SATURATE, ERROR]
+            required: false
+          - value: any
+        nullability: DECLARED_OUTPUT
+        return: i64
 window_functions:
   - name: "row_number"
     description: "the number of the current row within its partition."