Skip to content

Commit c86055b

Browse files
Rename Bytes -> TextEncodingNone (#460)
* Rename Bytes -> TextEncodingNone * Update docs * add support for operator.eq and operator.ne for TextEncodingNone * change test_text to use heavydb_fixture * Address reviewer comments
1 parent 11602d7 commit c86055b

11 files changed

+301
-146
lines changed

doc/api.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Externals
6060

6161

6262
HeavyDB Backend
63-
=================
63+
===============
6464

6565
The table below contains the data structures available for the HeavyDB backend.
6666
It should be noticed that the following types are not regular Python types but
@@ -74,3 +74,5 @@ one cannot create and use them inside the REPL, for instance.
7474
:toctree: generated/
7575

7676
rbc.heavyai.Array
77+
rbc.heavyai.TextEncodingDict
78+
rbc.heavyai.TextEncodingNone

rbc/heavyai/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from .array import * # noqa: F401, F403
22
from .column import * # noqa: F401, F403
33
from .buffer import * # noqa: F401, F403
4-
from .bytes import * # noqa: F401, F403
54
from .metatype import * # noqa: F401, F403
6-
from .text_encoding import * # noqa: F401, F403
75
from .pipeline import * # noqa: F401, F403
86
from .column_list import * # noqa: F401, F403
97
from .table_function_manager import * # noqa: F401, F403
8+
from .text_encoding_dict import * # noqa: F401, F403
9+
from .text_encoding_none import * # noqa: F401, F403
1010

1111
from . import mathimpl as math # noqa: F401
1212
from . import npyimpl as np # noqa: F401

rbc/heavyai/buffer.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ class HeavyDBBufferType(typesystem.Type):
4848
def pass_by_value(self):
4949
return False
5050

51+
@property
52+
def numba_pointer_type(self):
53+
return BufferPointer
54+
5155
@classmethod
5256
def preprocess_args(cls, args):
5357
assert len(args) == 1, args
@@ -75,11 +79,11 @@ def tonumba(self, bool_is_int8=None):
7579
*extra_members
7680
)
7781
buffer_type._params['NumbaType'] = BufferType
78-
buffer_type._params['NumbaPointerType'] = BufferPointer
82+
buffer_type._params['NumbaPointerType'] = self.numba_pointer_type
7983
numba_type = buffer_type.tonumba(bool_is_int8=True)
8084
if self.pass_by_value:
8185
return numba_type
82-
return BufferPointer(numba_type)
86+
return self.numba_pointer_type(numba_type)
8387

8488

8589
class BufferType(types.Type):

rbc/heavyai/bytes.py

-53
This file was deleted.

rbc/heavyai/text_encoding.py rbc/heavyai/text_encoding_dict.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
'''HeavyDB Bytes type that corresponds to HeavyDB type TEXT ENCODED NONE.
1+
'''HeavyDB TextEncodingDict type that corresponds to HeavyDB type TEXT ENCODED DICT.
22
'''
33

44
__all__ = ['HeavyDBTextEncodingDictType', 'TextEncodingDict']
@@ -17,4 +17,8 @@ def __typesystem_type__(self):
1717

1818

1919
class TextEncodingDict(object, metaclass=HeavyDBMetaType):
20+
'''HeavyDB TextEncodingDict type that corresponds to HeavyDB type TEXT ENCODED DICT.
21+
22+
HeavyDB TextEncodingDict behaves like an int32_t.
23+
'''
2024
pass

rbc/heavyai/text_encoding_none.py

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
'''HeavyDB TextEncodingNone type that corresponds to HeavyDB type TEXT ENCODED NONE.
2+
'''
3+
4+
__all__ = ['TextEncodingNonePointer', 'TextEncodingNone', 'HeavyDBTextEncodingNoneType']
5+
6+
import operator
7+
from rbc import typesystem
8+
from rbc.targetinfo import TargetInfo
9+
from rbc.errors import RequireLiteralValue
10+
from .buffer import (
11+
BufferPointer, Buffer, HeavyDBBufferType,
12+
heavydb_buffer_constructor)
13+
from numba.core import types, extending, cgutils
14+
from llvmlite import ir
15+
from typing import Union
16+
17+
18+
class HeavyDBTextEncodingNoneType(HeavyDBBufferType):
19+
"""HeavyDB TextEncodingNone type for RBC typesystem.
20+
"""
21+
22+
@property
23+
def numba_pointer_type(self):
24+
return TextEncodingNonePointer
25+
26+
@classmethod
27+
def preprocess_args(cls, args):
28+
element_type = typesystem.Type.fromstring('char8')
29+
return ((element_type,),)
30+
31+
@property
32+
def buffer_extra_members(self):
33+
return ('bool is_null',)
34+
35+
def match(self, other):
36+
if type(self) is type(other):
37+
return self[0] == other[0]
38+
if other.is_pointer and other[0].is_char and other[0].bits == 8:
39+
return 1
40+
if other.is_string:
41+
return 2
42+
43+
44+
class TextEncodingNonePointer(BufferPointer):
45+
pass
46+
47+
48+
class TextEncodingNone(Buffer):
49+
'''HeavyDB TextEncodingNone type that corresponds to HeavyDB type TEXT ENCODED NONE.
50+
51+
HeavyDB TextEncodingNone represents the following structure:
52+
53+
.. code-block:: c
54+
55+
struct TextEncodingNone {
56+
char* ptr;
57+
size_t sz; // when non-negative, TextEncodingNone has fixed width.
58+
int8_t is_null;
59+
}
60+
61+
62+
.. code-block:: python
63+
64+
from rbc.heavydb import TextEncodingNone
65+
66+
@heavydb('TextEncodingNone(int32, int32)')
67+
def make_abc(first, n):
68+
r = TextEncodingNone(n)
69+
for i in range(n):
70+
r[i] = first + i
71+
return r
72+
73+
74+
.. code-block:: python
75+
76+
from rbc.heavydb import TextEncodingNone
77+
@heavydb('TextEncodingNone()')
78+
def make_text():
79+
return TextEncodingNone('some text here')
80+
81+
'''
82+
83+
def __init__(self, size: Union[int, str]):
84+
pass
85+
86+
87+
@extending.overload(operator.eq)
88+
def text_encoding_none_eq(a, b):
89+
if isinstance(a, TextEncodingNonePointer) and isinstance(b, TextEncodingNonePointer):
90+
91+
def impl(a, b):
92+
if len(a) != len(b):
93+
return False
94+
for i in range(0, len(a)):
95+
if a[i] != b[i]:
96+
return False
97+
return True
98+
return impl
99+
elif isinstance(a, TextEncodingNonePointer) and isinstance(b, types.StringLiteral):
100+
lv = b.literal_value
101+
sz = len(lv)
102+
103+
def impl(a, b):
104+
if len(a) != sz:
105+
return False
106+
t = TextEncodingNone(lv)
107+
return a == t
108+
return impl
109+
110+
111+
@extending.overload(operator.ne)
112+
def text_encoding_none_ne(a, b):
113+
if isinstance(a, TextEncodingNonePointer):
114+
if isinstance(b, (TextEncodingNonePointer, types.StringLiteral)):
115+
def impl(a, b):
116+
return not(a == b)
117+
return impl
118+
119+
120+
@extending.lower_builtin(TextEncodingNone, types.Integer)
121+
def heavydb_text_encoding_none_constructor(context, builder, sig, args):
122+
return heavydb_buffer_constructor(context, builder, sig, args)
123+
124+
125+
@extending.lower_builtin(TextEncodingNone, types.StringLiteral)
126+
def heavydb_text_encoding_none_constructor_literal(context, builder, sig, args):
127+
int64_t = ir.IntType(64)
128+
int8_t_ptr = ir.IntType(8).as_pointer()
129+
130+
literal_value = sig.args[0].literal_value
131+
sz = int64_t(len(literal_value))
132+
133+
# arr = {ptr, size, is_null}*
134+
arr = heavydb_buffer_constructor(context, builder, sig.return_type(types.int64), [sz])
135+
ptr = builder.extract_value(builder.load(arr), [0])
136+
137+
msg_bytes = literal_value.encode('utf-8')
138+
msg_const = cgutils.make_bytearray(msg_bytes)
139+
msg_global_var = cgutils.global_constant(builder.module, f"Text({literal_value})", msg_const)
140+
msg_ptr = builder.bitcast(msg_global_var, int8_t_ptr)
141+
sizeof_char = TargetInfo().sizeof('char')
142+
cgutils.raw_memcpy(builder, ptr, msg_ptr, sz, sizeof_char)
143+
return arr
144+
145+
146+
@extending.type_callable(TextEncodingNone)
147+
def type_heavydb_text_encoding_none(context):
148+
def typer(arg):
149+
if isinstance(arg, types.UnicodeType):
150+
raise RequireLiteralValue()
151+
if isinstance(arg, (types.Integer, types.StringLiteral)):
152+
return typesystem.Type.fromobject('TextEncodingNone').tonumba()
153+
return typer

rbc/heavydb.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from .thrift import Client as ThriftClient
1515
from . import heavyai
1616
from .heavyai import (
17-
HeavyDBArrayType, HeavyDBBytesType, HeavyDBTextEncodingDictType,
17+
HeavyDBArrayType, HeavyDBTextEncodingNoneType, HeavyDBTextEncodingDictType,
1818
HeavyDBOutputColumnType, HeavyDBColumnType,
1919
HeavyDBCompilerPipeline, HeavyDBCursorType,
2020
BufferMeta, HeavyDBColumnListType, HeavyDBTableFunctionManagerType)
@@ -399,6 +399,7 @@ def add(a, b):
399399
Constant='int32|sizer=Constant',
400400
PreFlight='int32|sizer=PreFlight',
401401
ColumnList='HeavyDBColumnListType',
402+
TextEncodingNone='HeavyDBTextEncodingNoneType',
402403
TextEncodingDict='HeavyDBTextEncodingDictType',
403404
TableFunctionManager='HeavyDBTableFunctionManagerType<>',
404405
UDTF='int32|kind=UDTF'
@@ -869,7 +870,7 @@ def _get_ext_arguments_map(self):
869870
'GeoPolygon': typemap['TExtArgumentType'].get('GeoPolygon'),
870871
'GeoMultiPolygon': typemap['TExtArgumentType'].get(
871872
'GeoMultiPolygon'),
872-
'Bytes': typemap['TExtArgumentType'].get('TextEncodingNone'),
873+
'TextEncodingNone': typemap['TExtArgumentType'].get('TextEncodingNone'),
873874
'TextEncodingDict': typemap['TExtArgumentType'].get('TextEncodingDict'),
874875
'ColumnList<bool>': typemap['TExtArgumentType'].get('ColumnListBool'),
875876
'ColumnList<int8_t>': typemap['TExtArgumentType'].get('ColumnListInt8'),
@@ -914,7 +915,8 @@ def _get_ext_arguments_map(self):
914915
ext_arguments_map['HeavyDBOutputColumnListType<%s>' % ptr_type] \
915916
= ext_arguments_map.get('ColumnList<%s>' % T)
916917

917-
ext_arguments_map['HeavyDBBytesType<char8>'] = ext_arguments_map.get('Bytes')
918+
ext_arguments_map['HeavyDBTextEncodingNoneType<char8>'] = \
919+
ext_arguments_map.get('TextEncodingNone')
918920

919921
values = list(ext_arguments_map.values())
920922
for v, n in thrift.TExtArgumentType._VALUES_TO_NAMES.items():
@@ -1366,8 +1368,8 @@ def format_type(self, typ: typesystem.Type):
13661368
elif isinstance(typ, HeavyDBCursorType):
13671369
p = tuple(map(self.format_type, typ[0]))
13681370
typ = typesystem.Type(('Cursor',) + p, **typ._params)
1369-
elif isinstance(typ, HeavyDBBytesType):
1370-
typ = typ.copy().params(typename='Bytes')
1371+
elif isinstance(typ, HeavyDBTextEncodingNoneType):
1372+
typ = typ.copy().params(typename='TextEncodingNone')
13711373
use_typename = True
13721374
elif isinstance(typ, HeavyDBTextEncodingDictType):
13731375
typ = typ.copy().params(typename='TextEncodingDict')
@@ -1412,7 +1414,7 @@ def remote_call(self, func, ftype: typesystem.Type, arguments: tuple, hold=False
14121414

14131415
if isinstance(atype, (HeavyDBColumnType, HeavyDBColumnListType)):
14141416
args.append(f'CURSOR({a})')
1415-
elif isinstance(atype, HeavyDBBytesType):
1417+
elif isinstance(atype, HeavyDBTextEncodingNoneType):
14161418
if isinstance(a, bytes):
14171419
a = repr(a.decode())
14181420
elif isinstance(a, str):

0 commit comments

Comments
 (0)