Skip to content

Commit 182fb46

Browse files
committed
refactor
1 parent bc6e9e1 commit 182fb46

File tree

1 file changed

+48
-112
lines changed

1 file changed

+48
-112
lines changed

python/pyspark/sql.py

Lines changed: 48 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,26 @@
3535
"SQLContext", "HiveContext", "LocalHiveContext", "TestHiveContext", "SchemaRDD", "Row"]
3636

3737

38+
class DataType(object):
39+
"""Spark SQL DataType"""
40+
41+
def __repr__(self):
42+
return self.__class__.__name__
43+
44+
def __hash__(self):
45+
return hash(repr(self))
46+
47+
def __eq__(self, other):
48+
return (isinstance(other, self.__class__) and
49+
self.__dict__ == other.__dict__)
50+
51+
def __ne__(self, other):
52+
return not self.__eq__(other)
53+
54+
3855
class PrimitiveTypeSingleton(type):
56+
"""Metaclass for PrimitiveType"""
57+
3958
_instances = {}
4059

4160
def __call__(cls):
@@ -44,140 +63,91 @@ def __call__(cls):
4463
return cls._instances[cls]
4564

4665

47-
class StringType(object):
66+
class PrimitiveType(DataType):
67+
"""Spark SQL PrimitiveType"""
68+
69+
__metaclass__ = PrimitiveTypeSingleton
70+
71+
72+
class StringType(PrimitiveType):
4873
"""Spark SQL StringType
4974
5075
The data type representing string values.
51-
5276
"""
53-
__metaclass__ = PrimitiveTypeSingleton
5477

55-
def __repr__(self):
56-
return "StringType"
5778

58-
59-
class BinaryType(object):
79+
class BinaryType(PrimitiveType):
6080
"""Spark SQL BinaryType
6181
6282
The data type representing bytearray values.
63-
6483
"""
65-
__metaclass__ = PrimitiveTypeSingleton
66-
67-
def __repr__(self):
68-
return "BinaryType"
6984

7085

71-
class BooleanType(object):
86+
class BooleanType(PrimitiveType):
7287
"""Spark SQL BooleanType
7388
7489
The data type representing bool values.
75-
7690
"""
77-
__metaclass__ = PrimitiveTypeSingleton
7891

79-
def __repr__(self):
80-
return "BooleanType"
8192

82-
83-
class TimestampType(object):
93+
class TimestampType(PrimitiveType):
8494
"""Spark SQL TimestampType
8595
8696
The data type representing datetime.datetime values.
87-
8897
"""
89-
__metaclass__ = PrimitiveTypeSingleton
90-
91-
def __repr__(self):
92-
return "TimestampType"
9398

9499

95-
class DecimalType(object):
100+
class DecimalType(PrimitiveType):
96101
"""Spark SQL DecimalType
97102
98103
The data type representing decimal.Decimal values.
99-
100104
"""
101-
__metaclass__ = PrimitiveTypeSingleton
102105

103-
def __repr__(self):
104-
return "DecimalType"
105106

106-
107-
class DoubleType(object):
107+
class DoubleType(PrimitiveType):
108108
"""Spark SQL DoubleType
109109
110110
The data type representing float values.
111-
112111
"""
113-
__metaclass__ = PrimitiveTypeSingleton
114-
115-
def __repr__(self):
116-
return "DoubleType"
117112

118113

119-
class FloatType(object):
114+
class FloatType(PrimitiveType):
120115
"""Spark SQL FloatType
121116
122117
The data type representing single precision floating-point values.
123-
124118
"""
125-
__metaclass__ = PrimitiveTypeSingleton
126-
127-
def __repr__(self):
128-
return "FloatType"
129119

130120

131-
class ByteType(object):
121+
class ByteType(PrimitiveType):
132122
"""Spark SQL ByteType
133123
134124
The data type representing int values with 1 singed byte.
135-
136125
"""
137-
__metaclass__ = PrimitiveTypeSingleton
138-
139-
def __repr__(self):
140-
return "ByteType"
141126

142127

143-
class IntegerType(object):
128+
class IntegerType(PrimitiveType):
144129
"""Spark SQL IntegerType
145130
146131
The data type representing int values.
147-
148132
"""
149-
__metaclass__ = PrimitiveTypeSingleton
150133

151-
def __repr__(self):
152-
return "IntegerType"
153134

154-
155-
class LongType(object):
135+
class LongType(PrimitiveType):
156136
"""Spark SQL LongType
157137
158138
The data type representing long values. If the any value is beyond the range of
159139
[-9223372036854775808, 9223372036854775807], please use DecimalType.
160-
161140
"""
162-
__metaclass__ = PrimitiveTypeSingleton
163-
164-
def __repr__(self):
165-
return "LongType"
166141

167142

168-
class ShortType(object):
143+
class ShortType(PrimitiveType):
169144
"""Spark SQL ShortType
170145
171146
The data type representing int values with 2 signed bytes.
172-
173147
"""
174-
__metaclass__ = PrimitiveTypeSingleton
175-
176-
def __repr__(self):
177-
return "ShortType"
178148

179149

180-
class ArrayType(object):
150+
class ArrayType(DataType):
181151
"""Spark SQL ArrayType
182152
183153
The data type representing list values.
@@ -201,19 +171,12 @@ def __init__(self, elementType, containsNull=False):
201171
self.containsNull = containsNull
202172

203173
def __repr__(self):
204-
return "ArrayType(" + self.elementType.__repr__() + "," + \
205-
str(self.containsNull).lower() + ")"
174+
return "ArrayType(%r,%s)" % (self.elementType,
175+
str(self.containsNull).lower())
206176

207-
def __eq__(self, other):
208-
return (isinstance(other, self.__class__) and
209-
self.elementType == other.elementType and
210-
self.containsNull == other.containsNull)
211-
212-
def __ne__(self, other):
213-
return not self.__eq__(other)
214177

215178

216-
class MapType(object):
179+
class MapType(DataType):
217180
"""Spark SQL MapType
218181
219182
The data type representing dict values.
@@ -241,21 +204,11 @@ def __init__(self, keyType, valueType, valueContainsNull=True):
241204
self.valueContainsNull = valueContainsNull
242205

243206
def __repr__(self):
244-
return "MapType(" + self.keyType.__repr__() + "," + \
245-
self.valueType.__repr__() + "," + \
246-
str(self.valueContainsNull).lower() + ")"
247-
248-
def __eq__(self, other):
249-
return (isinstance(other, self.__class__) and
250-
self.keyType == other.keyType and
251-
self.valueType == other.valueType and
252-
self.valueContainsNull == other.valueContainsNull)
253-
254-
def __ne__(self, other):
255-
return not self.__eq__(other)
207+
return "MapType(%r,%r,%s)" % (self.keyType, self.valueType,
208+
str(self.valueContainsNull).lower())
256209

257210

258-
class StructField(object):
211+
class StructField(DataType):
259212
"""Spark SQL StructField
260213
261214
Represents a field in a StructType.
@@ -281,21 +234,11 @@ def __init__(self, name, dataType, nullable):
281234
self.nullable = nullable
282235

283236
def __repr__(self):
284-
return "StructField(" + self.name + "," + \
285-
self.dataType.__repr__() + "," + \
286-
str(self.nullable).lower() + ")"
287-
288-
def __eq__(self, other):
289-
return (isinstance(other, self.__class__) and
290-
self.name == other.name and
291-
self.dataType == other.dataType and
292-
self.nullable == other.nullable)
237+
return "StructField(%s,%r,%s)" % (self.name, self.dataType,
238+
str(self.nullable).lower())
293239

294-
def __ne__(self, other):
295-
return not self.__eq__(other)
296240

297-
298-
class StructType(object):
241+
class StructType(DataType):
299242
"""Spark SQL StructType
300243
301244
The data type representing namedtuple values.
@@ -318,15 +261,8 @@ def __init__(self, fields):
318261
self.fields = fields
319262

320263
def __repr__(self):
321-
return "StructType(List(" + \
322-
",".join([field.__repr__() for field in self.fields]) + "))"
323-
324-
def __eq__(self, other):
325-
return (isinstance(other, self.__class__) and
326-
self.fields == other.fields)
327-
328-
def __ne__(self, other):
329-
return not self.__eq__(other)
264+
return ("StructType(List(%s))" %
265+
",".join(repr(field) for field in self.fields))
330266

331267

332268
def _parse_datatype_list(datatype_list_string):

0 commit comments

Comments
 (0)