1616// under the License.
1717use std:: sync:: Arc ;
1818
19- use crate :: executor:: WkbExecutor ;
19+ use crate :: executor:: WkbBytesExecutor ;
2020use arrow_array:: builder:: StringBuilder ;
2121use arrow_schema:: DataType ;
2222use datafusion_common:: error:: Result ;
2323use datafusion_expr:: {
2424 scalar_doc_sections:: DOC_SECTION_OTHER , ColumnarValue , Documentation , Volatility ,
2525} ;
26- use geo_traits:: GeometryTrait ;
2726use sedona_common:: sedona_internal_err;
2827use sedona_expr:: scalar_udf:: { SedonaScalarKernel , SedonaScalarUDF } ;
2928use sedona_schema:: { datatypes:: SedonaType , matchers:: ArgMatcher } ;
30- use wkb:: reader:: Wkb ;
3129
3230pub fn st_geometry_type_udf ( ) -> SedonaScalarUDF {
3331 SedonaScalarUDF :: new (
@@ -67,16 +65,16 @@ impl SedonaScalarKernel for STGeometryType {
6765 arg_types : & [ SedonaType ] ,
6866 args : & [ ColumnarValue ] ,
6967 ) -> Result < ColumnarValue > {
70- let executor = WkbExecutor :: new ( arg_types, args) ;
71- let min_output_size = "POINT " . len ( ) * executor. num_iterations ( ) ;
68+ let executor = WkbBytesExecutor :: new ( arg_types, args) ;
69+ let min_output_size = "ST_POINT " . len ( ) * executor. num_iterations ( ) ;
7270 let mut builder = StringBuilder :: with_capacity ( executor. num_iterations ( ) , min_output_size) ;
7371
74- // We can do quite a lot better than this with some vectorized WKB processing,
75- // but for now we just do a slow iteration
76- executor . execute_wkb_void ( |maybe_item| {
77- match maybe_item {
78- Some ( item ) => {
79- builder. append_option ( invoke_scalar ( & item ) ? ) ;
72+ // Iterate over raw WKB bytes for faster type inference
73+ executor . execute_wkb_void ( |maybe_bytes| {
74+ match maybe_bytes {
75+ Some ( bytes ) => {
76+ let name = infer_geometry_type_name ( bytes ) ? ;
77+ builder. append_value ( name ) ;
8078 }
8179 None => builder. append_null ( ) ,
8280 }
@@ -87,20 +85,33 @@ impl SedonaScalarKernel for STGeometryType {
8785 }
8886}
8987
90- fn invoke_scalar ( item : & Wkb ) -> Result < Option < String > > {
91- match item. as_type ( ) {
92- geo_traits:: GeometryType :: Point ( _) => Ok ( Some ( "ST_Point" . to_string ( ) ) ) ,
93- geo_traits:: GeometryType :: LineString ( _) => Ok ( Some ( "ST_LineString" . to_string ( ) ) ) ,
94- geo_traits:: GeometryType :: Polygon ( _) => Ok ( Some ( "ST_Polygon" . to_string ( ) ) ) ,
95- geo_traits:: GeometryType :: MultiPoint ( _) => Ok ( Some ( "ST_MultiPoint" . to_string ( ) ) ) ,
96- geo_traits:: GeometryType :: MultiLineString ( _) => Ok ( Some ( "ST_MultiLineString" . to_string ( ) ) ) ,
97- geo_traits:: GeometryType :: MultiPolygon ( _) => Ok ( Some ( "ST_MultiPolygon" . to_string ( ) ) ) ,
98- geo_traits:: GeometryType :: GeometryCollection ( _) => {
99- Ok ( Some ( "ST_GeometryCollection" . to_string ( ) ) )
100- }
101-
102- // Other geometry types in geo that we should not get here: Rect, Triangle, Line
103- _ => sedona_internal_err ! ( "unexpected geometry type" ) ,
88+ /// Fast-path inference of geometry type name from raw WKB bytes
89+ /// An error will be thrown for invalid WKB bytes input
90+ ///
91+ /// Spec: https://libgeos.org/specifications/wkb/
92+ #[ inline]
93+ fn infer_geometry_type_name ( buf : & [ u8 ] ) -> Result < & ' static str > {
94+ if buf. len ( ) < 5 {
95+ return sedona_internal_err ! ( "Invalid WKB: buffer too small ({} bytes)" , buf. len( ) ) ;
96+ }
97+
98+ let byte_order = buf[ 0 ] ;
99+ let code = match byte_order {
100+ 0 => u32:: from_be_bytes ( [ buf[ 1 ] , buf[ 2 ] , buf[ 3 ] , buf[ 4 ] ] ) ,
101+ 1 => u32:: from_le_bytes ( [ buf[ 1 ] , buf[ 2 ] , buf[ 3 ] , buf[ 4 ] ] ) ,
102+ other => return sedona_internal_err ! ( "Unexpected byte order: {other}" ) ,
103+ } ;
104+
105+ // Only low 3 bits is for the base type, high bits include additional info
106+ match code & 0x7 {
107+ 1 => Ok ( "ST_Point" ) ,
108+ 2 => Ok ( "ST_LineString" ) ,
109+ 3 => Ok ( "ST_Polygon" ) ,
110+ 4 => Ok ( "ST_MultiPoint" ) ,
111+ 5 => Ok ( "ST_MultiLineString" ) ,
112+ 6 => Ok ( "ST_MultiPolygon" ) ,
113+ 7 => Ok ( "ST_GeometryCollection" ) ,
114+ _ => sedona_internal_err ! ( "WKB type code out of range. Got: {}" , code) ,
104115 }
105116}
106117
0 commit comments