Skip to content

Commit 3a24f4f

Browse files
committed
[SPARK-16772] correct references to DataType
1 parent 9ade77c commit 3a24f4f

File tree

4 files changed

+11
-8
lines changed

4 files changed

+11
-8
lines changed

python/pyspark/sql/catalog.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def registerFunction(self, name, f, returnType=StringType()):
193193
194194
:param name: name of the UDF
195195
:param f: python function
196-
:param returnType: a :class:`DataType` object
196+
:param returnType: a :class:`pyspark.sql.types.DataType` object
197197
198198
>>> spark.catalog.registerFunction("stringLengthString", lambda x: len(x))
199199
>>> spark.sql("SELECT stringLengthString('test')").collect()

python/pyspark/sql/context.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def registerFunction(self, name, f, returnType=StringType()):
184184
185185
:param name: name of the UDF
186186
:param f: python function
187-
:param returnType: a :class:`DataType` object
187+
:param returnType: a :class:`pyspark.sql.types.DataType` object
188188
189189
>>> sqlContext.registerFunction("stringLengthString", lambda x: len(x))
190190
>>> sqlContext.sql("SELECT stringLengthString('test')").collect()
@@ -226,8 +226,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
226226
from ``data``, which should be an RDD of :class:`Row`,
227227
or :class:`namedtuple`, or :class:`dict`.
228228
229-
When ``schema`` is :class:`DataType` or datatype string, it must match the real data, or
230-
exception will be thrown at runtime. If the given schema is not StructType, it will be
229+
When ``schema`` is :class:`pyspark.sql.types.DataType` or datatype string, it must match the real data, or
230+
an exception will be thrown at runtime. If the given schema is not StructType, it will be
231231
wrapped into a StructType as its only field, and the field name will be "value", each record
232232
will also be wrapped into a tuple, which can be converted to row later.
233233
@@ -236,7 +236,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
236236
237237
:param data: an RDD of any kind of SQL data representation(e.g. row, tuple, int, boolean,
238238
etc.), or :class:`list`, or :class:`pandas.DataFrame`.
239-
:param schema: a :class:`DataType` or a datatype string or a list of column names, default
239+
:param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of column names, default
240240
is None. The data type string format equals to `DataType.simpleString`, except that
241241
top level struct type can omit the `struct<>` and atomic types use `typeName()` as
242242
their format, e.g. use `byte` instead of `tinyint` for ByteType. We can also use `int`

python/pyspark/sql/functions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1781,6 +1781,9 @@ def udf(f, returnType=StringType()):
17811781
duplicate invocations may be eliminated or the function may even be invoked more times than
17821782
it is present in the query.
17831783
1784+
:param f: python function
1785+
:param returnType: a :class:`pyspark.sql.types.DataType` object
1786+
17841787
>>> from pyspark.sql.types import IntegerType
17851788
>>> slen = udf(lambda s: len(s), IntegerType())
17861789
>>> df.select(slen(df.name).alias('slen')).collect()

python/pyspark/sql/session.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
414414
from ``data``, which should be an RDD of :class:`Row`,
415415
or :class:`namedtuple`, or :class:`dict`.
416416
417-
When ``schema`` is :class:`DataType` or datatype string, it must match the real data, or
418-
exception will be thrown at runtime. If the given schema is not StructType, it will be
417+
When ``schema`` is :class:`pyspark.sql.types.DataType` or datatype string, it must match the real data, or
418+
an exception will be thrown at runtime. If the given schema is not StructType, it will be
419419
wrapped into a StructType as its only field, and the field name will be "value", each record
420420
will also be wrapped into a tuple, which can be converted to row later.
421421
@@ -424,7 +424,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
424424
425425
:param data: an RDD of any kind of SQL data representation(e.g. row, tuple, int, boolean,
426426
etc.), or :class:`list`, or :class:`pandas.DataFrame`.
427-
:param schema: a :class:`DataType` or a datatype string or a list of column names, default
427+
:param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of column names, default
428428
is None. The data type string format equals to `DataType.simpleString`, except that
429429
top level struct type can omit the `struct<>` and atomic types use `typeName()` as
430430
their format, e.g. use `byte` instead of `tinyint` for ByteType. We can also use `int`

0 commit comments

Comments
 (0)