Skip to content

Commit f2007f1

Browse files
committed
functions and types.
1 parent bc3b72b commit f2007f1

File tree

2 files changed

+41
-119
lines changed

2 files changed

+41
-119
lines changed

python/pyspark/sql/functions.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def _(col):
7676

7777

7878
def countDistinct(col, *cols):
79-
""" Return a new Column for distinct count of `col` or `cols`
79+
"""Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
8080
8181
>>> df.agg(countDistinct(df.age, df.name).alias('c')).collect()
8282
[Row(c=2)]
@@ -91,7 +91,7 @@ def countDistinct(col, *cols):
9191

9292

9393
def approxCountDistinct(col, rsd=None):
94-
""" Return a new Column for approximate distinct count of `col`
94+
"""Returns a new :class:`Column` for approximate distinct count of ``col``.
9595
9696
>>> df.agg(approxCountDistinct(df.age).alias('c')).collect()
9797
[Row(c=2)]
@@ -142,7 +142,7 @@ def __call__(self, *cols):
142142

143143

144144
def udf(f, returnType=StringType()):
145-
"""Create a user defined function (UDF)
145+
"""Creates a :class:`Column` expression representing a user defined function (UDF).
146146
147147
>>> from pyspark.sql.types import IntegerType
148148
>>> slen = udf(lambda s: len(s), IntegerType())

python/pyspark/sql/types.py

Lines changed: 38 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333

3434

3535
class DataType(object):
36-
37-
"""Spark SQL DataType"""
36+
"""Base class for data types."""
3837

3938
def __repr__(self):
4039
return self.__class__.__name__
@@ -67,7 +66,6 @@ def json(self):
6766
# This singleton pattern does not work with pickle, you will get
6867
# another object after pickle and unpickle
6968
class PrimitiveTypeSingleton(type):
70-
7169
"""Metaclass for PrimitiveType"""
7270

7371
_instances = {}
@@ -79,66 +77,45 @@ def __call__(cls):
7977

8078

8179
class PrimitiveType(DataType):
82-
8380
"""Spark SQL PrimitiveType"""
8481

8582
__metaclass__ = PrimitiveTypeSingleton
8683

8784

8885
class NullType(PrimitiveType):
86+
"""Null type.
8987
90-
"""Spark SQL NullType
91-
92-
The data type representing None, used for the types which has not
93-
been inferred.
88+
The data type representing None, used for the types that cannot be inferred.
9489
"""
9590

9691

9792
class StringType(PrimitiveType):
98-
99-
"""Spark SQL StringType
100-
101-
The data type representing string values.
93+
"""String data type.
10294
"""
10395

10496

10597
class BinaryType(PrimitiveType):
106-
107-
"""Spark SQL BinaryType
108-
109-
The data type representing bytearray values.
98+
"""Binary (byte array) data type.
11099
"""
111100

112101

113102
class BooleanType(PrimitiveType):
114-
115-
"""Spark SQL BooleanType
116-
117-
The data type representing bool values.
103+
"""Boolean data type.
118104
"""
119105

120106

121107
class DateType(PrimitiveType):
122-
123-
"""Spark SQL DateType
124-
125-
The data type representing datetime.date values.
108+
"""Date (datetime.date) data type.
126109
"""
127110

128111

129112
class TimestampType(PrimitiveType):
130-
131-
"""Spark SQL TimestampType
132-
133-
The data type representing datetime.datetime values.
113+
"""Timestamp (datetime.datetime) data type.
134114
"""
135115

136116

137117
class DecimalType(DataType):
138-
139-
"""Spark SQL DecimalType
140-
141-
The data type representing decimal.Decimal values.
118+
"""Decimal (decimal.Decimal) data type.
142119
"""
143120

144121
def __init__(self, precision=None, scale=None):
@@ -166,80 +143,55 @@ def __repr__(self):
166143

167144

168145
class DoubleType(PrimitiveType):
169-
170-
"""Spark SQL DoubleType
171-
172-
The data type representing float values.
146+
"""Double data type, representing double precision floats.
173147
"""
174148

175149

176150
class FloatType(PrimitiveType):
177-
178-
"""Spark SQL FloatType
179-
180-
The data type representing single precision floating-point values.
151+
"""Float data type, representing single precision floats.
181152
"""
182153

183154

184155
class ByteType(PrimitiveType):
185-
186-
"""Spark SQL ByteType
187-
188-
The data type representing int values with 1 singed byte.
156+
"""Byte data type, i.e. a signed integer in a single byte.
189157
"""
190158
def simpleString(self):
191159
return 'tinyint'
192160

193161

194162
class IntegerType(PrimitiveType):
195-
196-
"""Spark SQL IntegerType
197-
198-
The data type representing int values.
163+
"""Int data type, i.e. a signed 32-bit integer.
199164
"""
200165
def simpleString(self):
201166
return 'int'
202167

203168

204169
class LongType(PrimitiveType):
170+
"""Long data type, i.e. a signed 64-bit integer.
205171
206-
"""Spark SQL LongType
207-
208-
The data type representing long values. If the any value is
209-
beyond the range of [-9223372036854775808, 9223372036854775807],
210-
please use DecimalType.
172+
If the values are beyond the range of [-9223372036854775808, 9223372036854775807],
173+
please use :class:`DecimalType`.
211174
"""
212175
def simpleString(self):
213176
return 'bigint'
214177

215178

216179
class ShortType(PrimitiveType):
217-
218-
"""Spark SQL ShortType
219-
220-
The data type representing int values with 2 signed bytes.
180+
"""Short data type, i.e. a signed 16-bit integer.
221181
"""
222182
def simpleString(self):
223183
return 'smallint'
224184

225185

226186
class ArrayType(DataType):
187+
"""Array data type.
227188
228-
"""Spark SQL ArrayType
229-
230-
The data type representing list values. An ArrayType object
231-
comprises two fields, elementType (a DataType) and containsNull (a bool).
232-
The field of elementType is used to specify the type of array elements.
233-
The field of containsNull is used to specify if the array has None values.
234-
189+
:param elementType: :class:`DataType` of each element in the array.
190+
:param containsNull: boolean, whether the array can contain null (None) values.
235191
"""
236192

237193
def __init__(self, elementType, containsNull=True):
238-
"""Creates an ArrayType
239-
240-
:param elementType: the data type of elements.
241-
:param containsNull: indicates whether the list contains None values.
242-
194+
"""
243195
>>> ArrayType(StringType()) == ArrayType(StringType(), True)
244196
True
245197
>>> ArrayType(StringType(), False) == ArrayType(StringType())
@@ -268,29 +220,17 @@ def fromJson(cls, json):
268220

269221

270222
class MapType(DataType):
223+
"""Map data type.
271224
272-
"""Spark SQL MapType
273-
274-
The data type representing dict values. A MapType object comprises
275-
three fields, keyType (a DataType), valueType (a DataType) and
276-
valueContainsNull (a bool).
277-
278-
The field of keyType is used to specify the type of keys in the map.
279-
The field of valueType is used to specify the type of values in the map.
280-
The field of valueContainsNull is used to specify if values of this
281-
map has None values.
282-
283-
For values of a MapType column, keys are not allowed to have None values.
225+
:param keyType: :class:`DataType` of the keys in the map.
226+
:param valueType: :class:`DataType` of the values in the map.
227+
:param valueContainsNull: indicates whether values can contain null (None) values.
284228
229+
Keys in a map data type are not allowed to be null (None).
285230
"""
286231

287232
def __init__(self, keyType, valueType, valueContainsNull=True):
288-
"""Creates a MapType
289-
:param keyType: the data type of keys.
290-
:param valueType: the data type of values.
291-
:param valueContainsNull: indicates whether values contains
292-
null values.
293-
233+
"""
294234
>>> (MapType(StringType(), IntegerType())
295235
... == MapType(StringType(), IntegerType(), True))
296236
True
@@ -325,30 +265,16 @@ def fromJson(cls, json):
325265

326266

327267
class StructField(DataType):
268+
"""A field in :class:`StructType`.
328269
329-
"""Spark SQL StructField
330-
331-
Represents a field in a StructType.
332-
A StructField object comprises three fields, name (a string),
333-
dataType (a DataType) and nullable (a bool). The field of name
334-
is the name of a StructField. The field of dataType specifies
335-
the data type of a StructField.
336-
337-
The field of nullable specifies if values of a StructField can
338-
contain None values.
339-
270+
:param name: string, name of the field.
271+
:param dataType: :class:`DataType` of the field.
272+
:param nullable: boolean, whether the field can be null (None) or not.
273+
:param metadata: a dict from string to simple type that can be serialized to JSON automatically
340274
"""
341275

342276
def __init__(self, name, dataType, nullable=True, metadata=None):
343-
"""Creates a StructField
344-
:param name: the name of this field.
345-
:param dataType: the data type of this field.
346-
:param nullable: indicates whether values of this field
347-
can be null.
348-
:param metadata: metadata of this field, which is a map from string
349-
to simple type that can be serialized to JSON
350-
automatically
351-
277+
"""
352278
>>> (StructField("f1", StringType(), True)
353279
... == StructField("f1", StringType(), True))
354280
True
@@ -384,17 +310,13 @@ def fromJson(cls, json):
384310

385311

386312
class StructType(DataType):
313+
"""Struct type, consisting of a list of :class:`StructField`.
387314
388-
"""Spark SQL StructType
389-
390-
The data type representing rows.
391-
A StructType object comprises a list of L{StructField}.
392-
315+
This is the data type representing a :class:`Row`.
393316
"""
394317

395318
def __init__(self, fields):
396-
"""Creates a StructType
397-
319+
"""
398320
>>> struct1 = StructType([StructField("f1", StringType(), True)])
399321
>>> struct2 = StructType([StructField("f1", StringType(), True)])
400322
>>> struct1 == struct2
@@ -425,9 +347,9 @@ def fromJson(cls, json):
425347

426348

427349
class UserDefinedType(DataType):
428-
"""
350+
"""User-defined type (UDT).
351+
429352
.. note:: WARN: Spark Internal Use Only
430-
SQL User-Defined Type (UDT).
431353
"""
432354

433355
@classmethod

0 commit comments

Comments
 (0)