[SPARK-16772] correct references to DataType

nchammas · nchammas · commit 3a24f4fb7ce3 · 2016-07-28T12:42:13.000-04:00
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
@@ -193,7 +193,7 @@ def registerFunction(self, name, f, returnType=StringType()):
 
         :param name: name of the UDF
         :param f: python function
-        :param returnType: a :class:`DataType` object
+        :param returnType: a :class:`pyspark.sql.types.DataType` object
 
         >>> spark.catalog.registerFunction("stringLengthString", lambda x: len(x))
         >>> spark.sql("SELECT stringLengthString('test')").collect()
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
@@ -184,7 +184,7 @@ def registerFunction(self, name, f, returnType=StringType()):
 
         :param name: name of the UDF
         :param f: python function
-        :param returnType: a :class:`DataType` object
+        :param returnType: a :class:`pyspark.sql.types.DataType` object
 
         >>> sqlContext.registerFunction("stringLengthString", lambda x: len(x))
         >>> sqlContext.sql("SELECT stringLengthString('test')").collect()
@@ -226,8 +226,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
         from ``data``, which should be an RDD of :class:`Row`,
         or :class:`namedtuple`, or :class:`dict`.
 
-        When ``schema`` is :class:`DataType` or datatype string, it must match the real data, or
-        exception will be thrown at runtime. If the given schema is not StructType, it will be
+        When ``schema`` is :class:`pyspark.sql.types.DataType` or datatype string, it must match the real data, or
+        an exception will be thrown at runtime. If the given schema is not StructType, it will be
         wrapped into a StructType as its only field, and the field name will be "value", each record
         will also be wrapped into a tuple, which can be converted to row later.
 
@@ -236,7 +236,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
 
         :param data: an RDD of any kind of SQL data representation(e.g. row, tuple, int, boolean,
             etc.), or :class:`list`, or :class:`pandas.DataFrame`.
-        :param schema: a :class:`DataType` or a datatype string or a list of column names, default
+        :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of column names, default
             is None.  The data type string format equals to `DataType.simpleString`, except that
             top level struct type can omit the `struct<>` and atomic types use `typeName()` as
             their format, e.g. use `byte` instead of `tinyint` for ByteType. We can also use `int`
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -1781,6 +1781,9 @@ def udf(f, returnType=StringType()):
     duplicate invocations may be eliminated or the function may even be invoked more times than
     it is present in the query.
 
+    :param f: python function
+    :param returnType: a :class:`pyspark.sql.types.DataType` object
+
     >>> from pyspark.sql.types import IntegerType
     >>> slen = udf(lambda s: len(s), IntegerType())
     >>> df.select(slen(df.name).alias('slen')).collect()
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
@@ -414,8 +414,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
         from ``data``, which should be an RDD of :class:`Row`,
         or :class:`namedtuple`, or :class:`dict`.
 
-        When ``schema`` is :class:`DataType` or datatype string, it must match the real data, or
-        exception will be thrown at runtime. If the given schema is not StructType, it will be
+        When ``schema`` is :class:`pyspark.sql.types.DataType` or datatype string, it must match the real data, or
+        an exception will be thrown at runtime. If the given schema is not StructType, it will be
         wrapped into a StructType as its only field, and the field name will be "value", each record
         will also be wrapped into a tuple, which can be converted to row later.
 
@@ -424,7 +424,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
 
         :param data: an RDD of any kind of SQL data representation(e.g. row, tuple, int, boolean,
             etc.), or :class:`list`, or :class:`pandas.DataFrame`.
-        :param schema: a :class:`DataType` or a datatype string or a list of column names, default
+        :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of column names, default
             is None.  The data type string format equals to `DataType.simpleString`, except that
             top level struct type can omit the `struct<>` and atomic types use `typeName()` as
             their format, e.g. use `byte` instead of `tinyint` for ByteType. We can also use `int`