diff --git a/doc/source/io.rst b/doc/source/io.rst index 6ed71a1d40690..3e9359743b7a4 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2932,55 +2932,56 @@ if the source datatypes are compatible with BigQuery ones. For specifics on the service itself, see `here `__ As an example, suppose you want to load all data from an existing table -: `test_dataset.test_table` -into BigQuery and pull it into a DataFrame. +``test_dataset.test_table`` into BigQuery and pull it into a ``DataFrame``. -.. code-block:: python +:: from pandas.io import gbq data_frame = gbq.read_gbq('SELECT * FROM test_dataset.test_table') -The user will then be authenticated by the `bq` command line client - +The user will then be authenticated by the ``bq`` command line client - this usually involves the default browser opening to a login page, though the process can be done entirely from command line if necessary. -Datasets and additional parameters can be either configured with `bq`, -passed in as options to `read_gbq`, or set using Google's gflags (this -is not officially supported by this module, though care was taken -to ensure that they should be followed regardless of how you call the +Datasets and additional parameters can be either configured with ``bq``, +passed in as options to :func:`~pandas.read_gbq`, or set using Google's +``gflags`` (this is not officially supported by this module, though care was +taken to ensure that they should be followed regardless of how you call the method). Additionally, you can define which column to use as an index as well as a preferred column order as follows: -.. code-block:: python +:: data_frame = gbq.read_gbq('SELECT * FROM test_dataset.test_table', index_col='index_column_name', col_order='[col1, col2, col3,...]') -Finally, if you would like to create a BigQuery table, `my_dataset.my_table`, from the rows of DataFrame, `df`: +Finally, if you would like to create a BigQuery table, `my_dataset.my_table`, +from the rows of DataFrame, `df`: -.. code-block:: python +:: - df = pandas.DataFrame({'string_col_name' : ['hello'], - 'integer_col_name' : [1], - 'boolean_col_name' : [True]}) + df = pandas.DataFrame({'string_col_name': ['hello'], + 'integer_col_name': [1], + 'boolean_col_name': [True]}) schema = ['STRING', 'INTEGER', 'BOOLEAN'] - data_frame = gbq.to_gbq(df, 'my_dataset.my_table', - if_exists='fail', schema = schema) + data_frame = gbq.to_gbq(df, 'my_dataset.my_table', if_exists='fail', + schema=schema) To add more rows to this, simply: -.. code-block:: python +:: - df2 = pandas.DataFrame({'string_col_name' : ['hello2'], - 'integer_col_name' : [2], - 'boolean_col_name' : [False]}) + df2 = pandas.DataFrame({'string_col_name': ['hello2'], + 'integer_col_name': [2], + 'boolean_col_name': [False]}) data_frame = gbq.to_gbq(df2, 'my_dataset.my_table', if_exists='append') .. note:: - There is a hard cap on BigQuery result sets, at 128MB compressed. Also, the BigQuery SQL query language has some oddities, - see `here `__ + There is a hard cap on BigQuery result sets, at 128MB compressed. Also, the + BigQuery SQL query language has some oddities, see `here + `__ .. _io.stata: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index cee809da6719f..f953aeaa2a8a9 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -397,8 +397,11 @@ at the new values. .. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation .. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html -Like other pandas fill methods, ``interpolate`` accepts a ``limit`` keyword argument. -Use this to limit the number of consecutive interpolations, keeping ``NaN`` s for interpolations that are too far from the last valid observation: + +Like other pandas fill methods, ``interpolate`` accepts a ``limit`` keyword +argument. Use this to limit the number of consecutive interpolations, keeping +``NaN`` values for interpolations that are too far from the last valid +observation: .. ipython:: python diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 335f772bbc04c..b10af7f909405 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1982,29 +1982,35 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, Parameters ---------- - method : {'linear', 'time', 'values', 'index' 'nearest', - 'zero', 'slinear', 'quadratic', 'cubic', - 'barycentric', 'krogh', 'polynomial', 'spline' - 'piecewise_polynomial', 'pchip'} - 'linear': ignore the index and treat the values as equally spaced. default - 'time': interpolation works on daily and higher resolution + method : {'linear', 'time', 'values', 'index' 'nearest', 'zero', + 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', + 'polynomial', 'spline' 'piecewise_polynomial', 'pchip'} + + * 'linear': ignore the index and treat the values as equally + spaced. default + * 'time': interpolation works on daily and higher resolution data to interpolate given length of interval - 'index': use the actual numerical values of the index - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', - 'polynomial' is passed to `scipy.interpolate.interp1d` with the order given - both 'polynomial' and 'spline' requre that you also specify and order (int) - e.g. df.interpolate(method='polynomial', order=4) - 'krogh', 'piecewise_polynomial', 'spline', and 'pchip' are all wrappers - around the scipy interpolation methods of similar names. See the - scipy documentation for more on their behavior: - http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation - http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html + * 'index': use the actual numerical values of the index + * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'barycentric', 'polynomial' is passed to + `scipy.interpolate.interp1d` with the order given both + 'polynomial' and 'spline' requre that you also specify and order + (int) e.g. df.interpolate(method='polynomial', order=4) + * 'krogh', 'piecewise_polynomial', 'spline', and 'pchip' are all + wrappers around the scipy interpolation methods of similar + names. See the scipy documentation for more on their behavior: + http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation + http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html + axis : {0, 1}, default 0 - 0: fill column-by-column - 1: fill row-by-row - limit : int, default None. Maximum number of consecutive NaNs to fill. + * 0: fill column-by-column + * 1: fill row-by-row + limit : int, default None. + Maximum number of consecutive NaNs to fill. inplace : bool, default False + Update the NDFrame in place if possible. downcast : optional, 'infer' or None, defaults to 'infer' + Downcast dtypes if possible. Returns ------- diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 1185e9514f7fc..f0d9dbe9c5877 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -391,8 +391,8 @@ def to_excel(self, path, na_rep='', engine=None, **kwargs): ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and ``io.excel.xlsm.writer``. - Keyword Arguments - ----------------- + Other Parameters + ---------------- float_format : string, default None Format string for floating point numbers cols : sequence, optional @@ -409,6 +409,8 @@ def to_excel(self, path, na_rep='', engine=None, **kwargs): startow : upper left cell row to dump data frame startcol : upper left cell column to dump data frame + Notes + ----- Keyword arguments (and na_rep) are passed to the ``to_excel`` method for each DataFrame written. """ diff --git a/pandas/io/html.py b/pandas/io/html.py index 96bedbf390af6..f3cfa3a16807a 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -782,7 +782,10 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, latest information on table attributes for the modern web. parse_dates : bool, optional - See :func:`~pandas.read_csv` for details. + See :func:`~pandas.io.parsers.read_csv` for more details. In 0.13, this + parameter can sometimes interact strangely with ``infer_types``. If you + get a large number of ``NaT`` values in your results, consider passing + ``infer_types=False`` and manually converting types afterwards. tupleize_cols : bool, optional If ``False`` try to parse multiple header rows into a @@ -824,12 +827,12 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, See Also -------- - pandas.read_csv + pandas.io.parsers.read_csv """ if infer_types is not None: warnings.warn("infer_types will have no effect in 0.14", FutureWarning) else: - infer_types = True # TODO: remove in 0.14 + infer_types = True # TODO: remove effect of this in 0.14 # Type check here. We don't want to parse only to fail because of an # invalid value of an integer skiprows.