@@ -883,27 +883,66 @@ def dot(self, other):
883883 @classmethod
884884 def from_dict (cls , data , orient = 'columns' , dtype = None , columns = None ):
885885 """
886- Construct DataFrame from dict of array-like or dicts
886+ Construct DataFrame from dict of array-like or dicts.
887+
888+ Creates DataFrame object from dictionary by columns or by index
889+ allowing dtype specification.
887890
888891 Parameters
889892 ----------
890893 data : dict
891- {field : array-like} or {field : dict}
894+ Of the form {field : array-like} or {field : dict}.
892895 orient : {'columns', 'index'}, default 'columns'
893896 The "orientation" of the data. If the keys of the passed dict
894897 should be the columns of the resulting DataFrame, pass 'columns'
895898 (default). Otherwise if the keys should be rows, pass 'index'.
896899 dtype : dtype, default None
897- Data type to force, otherwise infer
898- columns: list, default None
899- Column labels to use when orient='index'. Raises a ValueError
900- if used with orient='columns'
900+ Data type to force, otherwise infer.
901+ columns : list, default None
902+ Column labels to use when `` orient='index'`` . Raises a ValueError
903+ if used with `` orient='columns'``.
901904
902905 .. versionadded:: 0.23.0
903906
904907 Returns
905908 -------
906- DataFrame
909+ pandas.DataFrame
910+
911+ See Also
912+ --------
913+ DataFrame.from_records : DataFrame from ndarray (structured
914+ dtype), list of tuples, dict, or DataFrame
915+ DataFrame : DataFrame object creation using constructor
916+
917+ Examples
918+ --------
919+ By default the keys of the dict become the DataFrame columns:
920+
921+ >>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']}
922+ >>> pd.DataFrame.from_dict(data)
923+ col_1 col_2
924+ 0 3 a
925+ 1 2 b
926+ 2 1 c
927+ 3 0 d
928+
929+ Specify ``orient='index'`` to create the DataFrame using dictionary
930+ keys as rows:
931+
932+ >>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']}
933+ >>> pd.DataFrame.from_dict(data, orient='index')
934+ 0 1 2 3
935+ row_1 3 2 1 0
936+ row_2 a b c d
937+
938+ When using the 'index' orientation, the column names can be
939+ specified manually:
940+
941+ >>> pd.DataFrame.from_dict(data, orient='index',
942+ ... columns=['A', 'B', 'C', 'D'])
943+ A B C D
944+ row_1 3 2 1 0
945+ row_2 a b c d
907946 """
908947 index = None
909948 orient = orient .lower ()
@@ -1209,20 +1248,68 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
12091248
12101249 def to_records (self , index = True , convert_datetime64 = True ):
12111250 """
1212- Convert DataFrame to record array. Index will be put in the
1213- 'index' field of the record array if requested
1251+ Convert DataFrame to a NumPy record array.
1252+
1253+ Index will be put in the 'index' field of the record array if
1254+ requested.
12141255
12151256 Parameters
12161257 ----------
12171258 index : boolean, default True
1218- Include index in resulting record array, stored in 'index' field
1259+ Include index in resulting record array, stored in 'index' field.
12191260 convert_datetime64 : boolean, default True
12201261 Whether to convert the index to datetime.datetime if it is a
1221- DatetimeIndex
1262+ DatetimeIndex.
12221263
12231264 Returns
12241265 -------
1225- y : recarray
1266+ y : numpy.recarray
1267+
1268+ See Also
1269+ --------
1270+ DataFrame.from_records: convert structured or record ndarray
1271+ to DataFrame.
1272+ numpy.recarray: ndarray that allows field access using
1273+ attributes, analogous to typed columns in a
1274+ spreadsheet.
1275+
1276+ Examples
1277+ --------
1278+ >>> df = pd.DataFrame({'A': [1, 2], 'B': [0.5, 0.75]},
1279+ ... index=['a', 'b'])
1280+ >>> df
1281+ A B
1282+ a 1 0.50
1283+ b 2 0.75
1284+ >>> df.to_records()
1285+ rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)],
1286+ dtype=[('index', 'O'), ('A', '<i8'), ('B', '<f8')])
1287+
1288+ The index can be excluded from the record array:
1289+
1290+ >>> df.to_records(index=False)
1291+ rec.array([(1, 0.5 ), (2, 0.75)],
1292+ dtype=[('A', '<i8'), ('B', '<f8')])
1293+
1294+ By default, timestamps are converted to `datetime.datetime`:
1295+
1296+ >>> df.index = pd.date_range('2018-01-01 09:00', periods=2, freq='min')
1297+ >>> df
1298+ A B
1299+ 2018-01-01 09:00:00 1 0.50
1300+ 2018-01-01 09:01:00 2 0.75
1301+ >>> df.to_records()
1302+ rec.array([(datetime.datetime(2018, 1, 1, 9, 0), 1, 0.5 ),
1303+ (datetime.datetime(2018, 1, 1, 9, 1), 2, 0.75)],
1304+ dtype=[('index', 'O'), ('A', '<i8'), ('B', '<f8')])
1305+
1306+ The timestamp conversion can be disabled so NumPy's datetime64
1307+ data type is used instead:
1308+
1309+ >>> df.to_records(convert_datetime64=False)
1310+ rec.array([('2018-01-01T09:00:00.000000000', 1, 0.5 ),
1311+ ('2018-01-01T09:01:00.000000000', 2, 0.75)],
1312+ dtype=[('index', '<M8[ns]'), ('A', '<i8'), ('B', '<f8')])
12261313 """
12271314 if index :
12281315 if is_datetime64_any_dtype (self .index ) and convert_datetime64 :
@@ -4722,20 +4809,90 @@ def melt(self, id_vars=None, value_vars=None, var_name=None,
47224809
47234810 def diff (self , periods = 1 , axis = 0 ):
47244811 """
4725- 1st discrete difference of object
4812+ First discrete difference of element.
4813+
4814+ Calculates the difference of a DataFrame element compared with another
4815+ element in the DataFrame (default is the element in the same column
4816+ of the previous row).
47264817
47274818 Parameters
47284819 ----------
47294820 periods : int, default 1
4730- Periods to shift for forming difference
4821+ Periods to shift for calculating difference, accepts negative
4822+ values.
47314823 axis : {0 or 'index', 1 or 'columns'}, default 0
47324824 Take difference over rows (0) or columns (1).
47334825
4734- .. versionadded:: 0.16.1
4826+ .. versionadded:: 0.16.1.
47354827
47364828 Returns
47374829 -------
47384830 diffed : DataFrame
4831+
4832+ See Also
4833+ --------
4834+ Series.diff: First discrete difference for a Series.
4835+ DataFrame.pct_change: Percent change over given number of periods.
4836+ DataFrame.shift: Shift index by desired number of periods with an
4837+ optional time freq.
4838+
4839+ Examples
4840+ --------
4841+ Difference with previous row
4842+
4843+ >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],
4844+ ... 'b': [1, 1, 2, 3, 5, 8],
4845+ ... 'c': [1, 4, 9, 16, 25, 36]})
4846+ >>> df
4847+ a b c
4848+ 0 1 1 1
4849+ 1 2 1 4
4850+ 2 3 2 9
4851+ 3 4 3 16
4852+ 4 5 5 25
4853+ 5 6 8 36
4854+
4855+ >>> df.diff()
4856+ a b c
4857+ 0 NaN NaN NaN
4858+ 1 1.0 0.0 3.0
4859+ 2 1.0 1.0 5.0
4860+ 3 1.0 1.0 7.0
4861+ 4 1.0 2.0 9.0
4862+ 5 1.0 3.0 11.0
4863+
4864+ Difference with previous column
4865+
4866+ >>> df.diff(axis=1)
4867+ a b c
4868+ 0 NaN 0.0 0.0
4869+ 1 NaN -1.0 3.0
4870+ 2 NaN -1.0 7.0
4871+ 3 NaN -1.0 13.0
4872+ 4 NaN 0.0 20.0
4873+ 5 NaN 2.0 28.0
4874+
4875+ Difference with 3rd previous row
4876+
4877+ >>> df.diff(periods=3)
4878+ a b c
4879+ 0 NaN NaN NaN
4880+ 1 NaN NaN NaN
4881+ 2 NaN NaN NaN
4882+ 3 3.0 2.0 15.0
4883+ 4 3.0 4.0 21.0
4884+ 5 3.0 6.0 27.0
4885+
4886+ Difference with following row
4887+
4888+ >>> df.diff(periods=-1)
4889+ a b c
4890+ 0 -1.0 0.0 -3.0
4891+ 1 -1.0 -1.0 -5.0
4892+ 2 -1.0 -1.0 -7.0
4893+ 3 -1.0 -2.0 -9.0
4894+ 4 -1.0 -3.0 -11.0
4895+ 5 NaN NaN NaN
47394896 """
47404897 bm_axis = self ._get_block_manager_axis (axis )
47414898 new_data = self ._data .diff (n = periods , axis = bm_axis )
@@ -5501,7 +5658,22 @@ def corr(self, method='pearson', min_periods=1):
55015658
55025659 def cov (self , min_periods = None ):
55035660 """
5504- Compute pairwise covariance of columns, excluding NA/null values
5661+ Compute pairwise covariance of columns, excluding NA/null values.
5662+
5663+ Compute the pairwise covariance among the series of a DataFrame.
5664+ The returned data frame is the `covariance matrix
5665+ <https://en.wikipedia.org/wiki/Covariance_matrix>`__ of the columns
5666+ of the DataFrame.
5667+
5668+ Both NA and null values are automatically excluded from the
5669+ calculation. (See the note below about bias from missing values.)
5670+ A threshold can be set for the minimum number of
5671+ observations for each value created. Comparisons with observations
5672+ below this threshold will be returned as ``NaN``.
5673+
5674+ This method is generally used for the analysis of time series data to
5675+ understand the relationship between different measures
5676+ across time.
55055677
55065678 Parameters
55075679 ----------
@@ -5511,12 +5683,71 @@ def cov(self, min_periods=None):
55115683
55125684 Returns
55135685 -------
5514- y : DataFrame
5686+ DataFrame
5687+ The covariance matrix of the series of the DataFrame.
5688+
5689+ See Also
5690+ --------
5691+ pandas.Series.cov : compute covariance with another Series
5692+ pandas.core.window.EWM.cov: expoential weighted sample covariance
5693+ pandas.core.window.Expanding.cov : expanding sample covariance
5694+ pandas.core.window.Rolling.cov : rolling sample covariance
55155695
55165696 Notes
55175697 -----
5518- `y` contains the covariance matrix of the DataFrame's time series.
5519- The covariance is normalized by N-1 (unbiased estimator).
5698+ Returns the covariance matrix of the DataFrame's time series.
5699+ The covariance is normalized by N-1.
5700+
5701+ For DataFrames that have Series that are missing data (assuming that
5702+ data is `missing at random
5703+ <https://en.wikipedia.org/wiki/Missing_data#Missing_at_random>`__)
5704+ the returned covariance matrix will be an unbiased estimate
5705+ of the variance and covariance between the member Series.
5706+
5707+ However, for many applications this estimate may not be acceptable
5708+ because the estimate covariance matrix is not guaranteed to be positive
5709+ semi-definite. This could lead to estimate correlations having
5710+ absolute values which are greater than one, and/or a non-invertible
5711+ covariance matrix. See `Estimation of covariance matrices
5712+ <http://en.wikipedia.org/w/index.php?title=Estimation_of_covariance_
5713+ matrices>`__ for more details.
5714+
5715+ Examples
5716+ --------
5717+ >>> df = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)],
5718+ ... columns=['dogs', 'cats'])
5719+ >>> df.cov()
5720+ dogs cats
5721+ dogs 0.666667 -1.000000
5722+ cats -1.000000 1.666667
5723+
5724+ >>> np.random.seed(42)
5725+ >>> df = pd.DataFrame(np.random.randn(1000, 5),
5726+ ... columns=['a', 'b', 'c', 'd', 'e'])
5727+ >>> df.cov()
5728+ a b c d e
5729+ a 0.998438 -0.020161 0.059277 -0.008943 0.014144
5730+ b -0.020161 1.059352 -0.008543 -0.024738 0.009826
5731+ c 0.059277 -0.008543 1.010670 -0.001486 -0.000271
5732+ d -0.008943 -0.024738 -0.001486 0.921297 -0.013692
5733+ e 0.014144 0.009826 -0.000271 -0.013692 0.977795
5734+
5735+ **Minimum number of periods**
5736+
5737+ This method also supports an optional ``min_periods`` keyword
5738+ that specifies the required minimum number of non-NA observations for
5739+ each column pair in order to have a valid result:
5740+
5741+ >>> np.random.seed(42)
5742+ >>> df = pd.DataFrame(np.random.randn(20, 3),
5743+ ... columns=['a', 'b', 'c'])
5744+ >>> df.loc[df.index[:5], 'a'] = np.nan
5745+ >>> df.loc[df.index[5:10], 'b'] = np.nan
5746+ >>> df.cov(min_periods=12)
5747+ a b c
5748+ a 0.316741 NaN -0.150812
5749+ b NaN 1.248003 0.191417
5750+ c -0.150812 0.191417 0.895202
55205751 """
55215752 numeric_df = self ._get_numeric_data ()
55225753 cols = numeric_df .columns
0 commit comments