From 879ec63774a2a631af8a96277b7046d7b5007c52 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Sun, 24 Nov 2019 23:59:52 +0100
Subject: [PATCH 01/47] Add new getting started tutorials to pandas
---
doc/source/conf.py | 4 +
doc/source/getting_started/index.rst | 1 +
.../intro_tutorials/01_table_oriented.rst | 218 ++++++++++
.../intro_tutorials/02_read_write.rst | 227 ++++++++++
.../intro_tutorials/03_subset_data.rst | 405 ++++++++++++++++++
.../intro_tutorials/04_plotting.rst | 244 +++++++++++
.../intro_tutorials/05_add_columns.rst | 186 ++++++++
.../06_calculate_statistics.rst | 310 ++++++++++++++
.../07_reshape_table_layout.rst | 402 +++++++++++++++++
.../intro_tutorials/08_combine_dataframes.rst | 323 ++++++++++++++
.../intro_tutorials/09_timeseries.rst | 382 +++++++++++++++++
.../intro_tutorials/10_text_data.rst | 278 ++++++++++++
.../getting_started/intro_tutorials/index.rst | 22 +
13 files changed, 3002 insertions(+)
create mode 100644 doc/source/getting_started/intro_tutorials/01_table_oriented.rst
create mode 100644 doc/source/getting_started/intro_tutorials/02_read_write.rst
create mode 100644 doc/source/getting_started/intro_tutorials/03_subset_data.rst
create mode 100644 doc/source/getting_started/intro_tutorials/04_plotting.rst
create mode 100644 doc/source/getting_started/intro_tutorials/05_add_columns.rst
create mode 100644 doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
create mode 100644 doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
create mode 100644 doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
create mode 100644 doc/source/getting_started/intro_tutorials/09_timeseries.rst
create mode 100644 doc/source/getting_started/intro_tutorials/10_text_data.rst
create mode 100644 doc/source/getting_started/intro_tutorials/index.rst
diff --git a/doc/source/conf.py b/doc/source/conf.py
index b4f719b6e64b2..dd699b442eda9 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -225,6 +225,10 @@
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
+html_css_files = [
+ 'css/getting_started.css',
+]
+
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 34bb4f930f175..1a8cb242a110e 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -15,6 +15,7 @@ Getting started
install
overview
10min
+ intro_tutorials/index
basics
dsintro
comparison/index
diff --git a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
new file mode 100644
index 0000000000000..ea9a50987400c
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
@@ -0,0 +1,218 @@
+.. _10min_tut_01_tableoriented:
+
+{{ header }}
+
+Pandas is table oriented
+------------------------
+
+.. raw:: html
+
+
+
+
+I want to start using pandas
+
+.. ipython:: python
+
+ import pandas as pd
+
+To load the pandas package and start working with it, import the
+package. The community agreed alias for pandas is ``pd``, so loading
+pandas as ``pd`` is assumed standard practice for all of the pandas
+documentation.
+
+.. raw:: html
+
+
+
+
+Pandas data table representation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/01_table_dataframe.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want to store passenger data of the Titanic. For a number of passengers, I know the name (characters), age (integers) and sex (male/female) data.
+
+.. ipython:: python
+
+ df = pd.DataFrame({
+ "Name": ["Braund, Mr. Owen Harris",
+ "Allen, Mr. William Henry",
+ "Bonnell, Miss. Elizabeth"],
+ "Age": [22, 35, 58],
+ "Sex": ["male", "male", "female"]}
+ )
+ df
+
+To manually store data in a table, create a ``DataFrame``. When using a Python dictionary of lists, the dictionary keys will be used as column headers and
+the values in each list as rows of the ``DataFrame``.
+
+.. raw:: html
+
+
+
+
+A ``DataFrame`` is a 2-dimensional data structure that can store data of
+different types (including characters, integers, floating point values,
+categorical data and more) in columns. It is similar to a spreadsheet, a
+SQL table or the ``data.frame`` in R.
+
+- The table has 3 columns, each of them with a column label. The column
+ labels are respectively ``Name``, ``Age`` and ``Sex``.
+- The column ``Name`` consists of textual data with each value a
+ string, the column ``Age`` are numbers and the column ``Sex`` is
+ textual data.
+
+In spreadsheet software, the table representation of our data would look
+very similar:
+
+.. image:: ../../_static/schemas/01_table_spreadsheet.png
+ :align: center
+
+Each column in a ``DataFrame`` is a ``Series``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/01_table_series.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I’m just interested in working with the data in the column ``Age``
+
+.. ipython:: python
+
+ df["Age"]
+
+When selecting a single column of a pandas ``DataFrame``, the result is
+a pandas ``Series``. To select the column, use the column label in
+between square brackets ``[]``.
+
+.. raw:: html
+
+
+
+
+.. note::
+ If you are familiar to Python
+ :ref:`dictionaries `, the selection of a
+ single column is very similar to selection of dictionary values based on
+ the key.
+
+You can create a ``Series`` from scratch as well:
+
+.. ipython:: python
+
+ ages = pd.Series([22, 35, 58], name = "Age")
+ ages
+
+A pandas ``Series`` has no column labels, as it is just a single column
+of a ``DataFrame``. A Series does have row labels.
+
+Do something with a DataFrame or Series
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+
+
+
+I want to know the maximum Age of the passengers
+
+We can do this on the ``DataFrame`` by selecting the ``Age`` column and
+applying ``max()``:
+
+.. ipython:: python
+
+ df["Age"].max()
+
+Or to the ``Series``:
+
+.. ipython:: python
+
+ ages.max()
+
+.. raw:: html
+
+
+
+
+As illustrated by the ``max()`` method, you can *do* things with a
+``DataFrame`` or ``Series``. Pandas provides a lot of functionalities,
+each of them a *method* you can apply to a ``DataFrame`` or ``Series``.
+As methods are functions, do not forget to use parentheses ``()``.
+
+.. raw:: html
+
+
+
+
+I’m interested in some basic statistics of the numerical data of my data table
+
+.. ipython:: python
+
+ df.describe()
+
+The ``describe`` method provides quick overview of the numerical data in
+a ``DataFrame``. As the ``Name`` and ``Sex`` columns are textual data,
+these are by default not taken into account by the ``describe`` method.
+
+.. raw:: html
+
+
+
+
+Many pandas operations return a ``DataFrame`` or a ``Series``. The
+``describe`` method is an example of a pandas operation returning a
+pandas ``Series``.
+
+.. raw:: html
+
+
+ To user guide
+
+Check more options on ``describe`` in the user guide at :ref:`basics.describe`
+
+.. raw:: html
+
+
+
+.. note::
+ This is just a starting point. Similar to spreadsheet
+ software, pandas represents data as a table with columns and rows. Apart
+ from the representation, also the data manipulations and calculations
+ you would do in spreadsheet software are supported by pandas. Continue
+ reading the next tutorials to get started!
+
+.. raw:: html
+
+
+
REMEMBER
+
+- Import the package, aka ``import pandas as pd``
+- A table of data is stored as a pandas ``DataFrame``
+- Each column in a ``DataFrame`` is a ``Series``
+- You can do things by applying a method to a ``DataFrame`` or ``Series``
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+A more extended introduction to ``DataFrame`` and ``Series`` is provided in :ref:`dsintro`.
+
+.. raw:: html
+
+
\ No newline at end of file
diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst
new file mode 100644
index 0000000000000..034ad20ea8f82
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst
@@ -0,0 +1,227 @@
+.. _10min_tut_02_read_write:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Titanic data
+
+
+
+
+
+This tutorial uses the titanic data set, stored as CSV. The data
+consists of the following data columns:
+
+- PassengerId: Id of every passenger.
+- Survived: This feature have value 0 and 1. 0 for not survived and 1
+ for survived.
+- Pclass: There are 3 classes: Class 1, Class 2 and Class 3.
+- Name: Name of passenger.
+- Sex: Gender of passenger.
+- Age: Age of passenger.
+- SibSp: Indication that passenger have siblings and spouse.
+- Parch: Whether a passenger is alone or have family.
+- Ticket: Ticket number of passenger.
+- Fare: Indicating the fare.
+- Cabin: The cabin of passenger.
+- Embarked: The embarked category.
+
+.. raw:: html
+
+
+
+How do I read and write tabular data?
+=====================================
+
+.. image:: ../../_static/schemas/02_io_readwrite.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want to analyse the titanic passenger data, available as a CSV file.
+
+.. ipython:: python
+
+ titanic = pd.read_csv("data/titanic.csv")
+
+pandas provides the ``read_csv`` function to read data stored as a csv
+file into a pandas ``DataFrame``. Pandas supports many different file
+formats or data sources out of the box (csv, excel, sql, json, parquet,
+…), each of them with the prefix ``read_*``.
+
+.. raw:: html
+
+
+
+
+Make sure to always have a check on the data after reading in the
+data. When displaying a ``DataFrame``, the first and last 5 rows will be
+shown by default:
+
+.. ipython:: python
+
+ titanic
+
+.. raw:: html
+
+
+
+
+I want to see the first 8 rows of a pandas DataFrame.
+
+.. ipython:: python
+
+ titanic.head(8)
+
+To see the first N rows of a ``DataFrame``, use the ``head`` method with
+the required number of rows (in this case 8) as argument.
+
+.. raw:: html
+
+
+
+
+.. note::
+
+ Interested in the last N rows instead? Pandas also provides a
+ ``tail`` method. For example, ``titanic.tail(10)`` will return the last
+ 10 rows of the DataFrame.
+
+A check on how pandas interpreted each of the column data types can be
+done by requesting the pandas ``dtypes`` attribute:
+
+.. ipython:: python
+
+ titanic.dtypes
+
+For each of the columns, the used data type is enlisted. The data types
+in this ``DataFrame`` are integers (``int64``), floats (``float63``) and
+strings (``object``).
+
+.. note::
+ When asking for the ``dtypes``, no brackets are used!
+ ``dtypes`` is an attribute of a ``DataFrame`` and ``Series``. Attributes
+ of ``DataFrame`` or ``Series`` do not need brackets. Attributes
+ represent a characteristic of a ``DataFrame``/``Series``, whereas a
+ method (which requires brackets) *do* something with the
+ ``DataFrame``/``Series`` as introduced in the `first
+ tutorial <./1_table_oriented.ipynb>`__.
+
+.. raw:: html
+
+
+
+
+My colleague requested the titanic data as a spreadsheet.
+
+.. ipython:: python
+
+ titanic.to_excel('titanic.xlsx', sheet_name='passengers', index=False)
+
+Whereas ``read_*`` functions are used to read data to pandas, the
+``to_*`` methods are used to store data. The ``to_excel`` method stores
+the data as an excel file. In the example here, the ``sheet_name`` is
+named *passengers* instead of the default *Sheet1*. By setting
+``index=False`` the row index labels are not saved in the spreadsheet.
+
+.. raw:: html
+
+
+
+
+The equivalent read function ``read_excel`` would reload the data to a
+DataFrame:
+
+.. ipython:: python
+
+ titanic = pd.read_excel('titanic.xlsx', sheet_name='passengers')
+
+.. ipython:: python
+
+ titanic.head()
+
+.. raw:: html
+
+
+
+
+I’m interested in a technical summary of a ``DataFrame``
+
+.. ipython:: python
+
+ titanic.info()
+
+
+The method ``info`` provides a lot of technical information about the
+``DataFrame``, so let’s explain the output in more detail:
+
+- It is indeed a ``DataFrame``.
+- There are 891 entries, i.e. 891 rows.
+- Each row has a row label (aka the ``index``) with values ranging from
+ 0 to 890.
+- The table has 12 columns. Most columns have a value for each of the
+ rows (all 891 values are ``non-null``). Some columns do have missing
+ values and less than 891 ``non-null`` values.
+- The columns ``Name``, ``Sex``, ``Cabin`` and ``Embarked`` consists of
+ textual data (strings, aka ``object``). The other columns are
+ numerical data with some of them whole numbers (aka ``integer``) and
+ others are real numbers (aka ``float``).
+- The kind of data (characters, integers,…) in the different columns
+ are summarized by listing the ``dtypes``.
+- The approximate amount of RAM used to hold the DataFrame is provided
+ as well.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+
REMEMBER
+
+- Getting data in to pandas from many different file formats or data
+ sources is supported by ``read_*`` functions.
+- Exporting data out of pandas is provided by different
+ ``to_*``\ methods.
+- The ``head``/``tail``/``info`` methods and the ``dtypes`` attribute
+ are convenient for a first check.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+For a complete overview of the input and output possibilites from and to pandas, see :ref:`io`
+
+.. raw:: html
+
+
diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
new file mode 100644
index 0000000000000..97ce7eb4e7e52
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
@@ -0,0 +1,405 @@
+.. _10min_tut_03_subset:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Titanic data
+
+
+
+
+
+This tutorial uses the titanic data set, stored as CSV. The data
+consists of the following data columns:
+
+- PassengerId: Id of every passenger.
+- Survived: This feature have value 0 and 1. 0 for not survived and 1
+ for survived.
+- Pclass: There are 3 classes: Class 1, Class 2 and Class 3.
+- Name: Name of passenger.
+- Sex: Gender of passenger.
+- Age: Age of passenger.
+- SibSp: Indication that passenger have siblings and spouse.
+- Parch: Whether a passenger is alone or have family.
+- Ticket: Ticket number of passenger.
+- Fare: Indicating the fare.
+- Cabin: The cabin of passenger.
+- Embarked: The embarked category.
+
+.. raw:: html
+
+
+
+How do I select a subset of data in a ``DataFrame``?
+====================================================
+
+How do I select specific columns from a ``DataFrame``?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/03_subset_columns.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I’m interested in the age of the titanic passengers.
+
+.. ipython:: python
+
+ ages = titanic["Age"]
+ ages.head()
+
+To select a single column, use square brackets ``[]`` with the column
+name of the column of interest.
+
+.. raw:: html
+
+
+
+
+Each column in a ``DataFrame`` is a ``Series``. As a single column is
+selected, the returned object is a pandas ``Series``. We can verify this
+by checking the type of the output:
+
+.. ipython:: python
+
+ type(titanic["Age"])
+
+And have a look at the ``shape`` of the output:
+
+.. ipython:: python
+
+ titanic["Age"].shape
+
+``shape`` is an attribute (remember :ref:`tutorial on reading and writing <10min_tut_02_read_write>`, do not use parantheses for attributes) of a
+pandas ``Series`` and ``DataFrame`` containing the number of rows and
+columns: *(nrows, ncolumns)*. A pandas Series is 1-dimensional and only
+the number of rows is returned.
+
+.. raw:: html
+
+
+
+
+I’m interested in the age and sex of the titanic passengers.
+
+.. ipython:: python
+
+ age_sex = titanic[["Age", "Sex"]]
+ age_sex.head()
+
+To select multiple columns, use a list of column names within the
+selection brackets ``[]``.
+
+.. raw:: html
+
+
+
+
+.. note::
+ The inner square brackets define a
+ :ref:`Python list ` with column names, whereas
+ the outer brackets are used to select the data from a pandas
+ ``DataFrame`` as seen in the previous example.
+
+The returned data type is a pandas DataFrame:
+
+.. ipython:: python
+
+ type(titanic[["Age", "Sex"]])
+
+.. ipython:: python
+
+ titanic[["Age", "Sex"]].shape
+
+The selection returned a ``DataFrame`` with 891 rows and 2 columns. Remember, a
+``DataFrame`` is 2-dimensional with both a row and column dimension.
+
+.. raw:: html
+
+
+ To user guide
+
+For basic information on indexing, see :ref:`indexing.basics`
+
+.. raw:: html
+
+
+
+How do I filter specific rows from a ``DataFrame``?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/03_subset_rows.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I’m interested in the passengers older than 35 years.
+
+.. ipython:: python
+
+ above_35 = titanic[titanic["Age"] > 35]
+ above_35.head()
+
+To select rows based on a conditional expression, use a condition inside
+the selection brackets ``[]``.
+
+.. raw:: html
+
+
+
+
+The condition inside the selection
+brackets ``titanic["Age"] > 35`` checks for which rows the ``Age``
+column has a value larger than 35:
+
+.. ipython:: python
+
+ titanic["Age"] > 35
+
+The output of the conditional expression (``>``, but also ``==``,
+``!=``, ``<``, ``<=``,… would work) is actually a pandas ``Series`` of
+boolean values (either ``True`` or ``False``) with the same number of
+rows as the original ``DataFrame``. Such a ``Series`` of boolean values
+can be used to filter the ``DataFrame`` by putting it in between the
+selection brackets ``[]``. Only rows for which the value is ``True``
+will be selected.
+
+We now from before that the original titanic ``DataFrame`` consists of
+891 rows. Let’s have a look at the amount of rows which satisfy the
+condition by checking the ``shape`` attribute of the resulting
+``DataFrame`` ``above_35``:
+
+.. ipython:: python
+
+ above_35.shape
+
+.. raw:: html
+
+
+
+
+I’m interested in the titanic passengers from cabin class 2 and 3.
+
+.. ipython:: python
+
+ class_23 = titanic[titanic["Pclass"].isin([2, 3])]
+ class_23.head()
+
+Similar to the conditional expression, the ``isin`` conditional function
+returns a ``True`` for each row the values are in the provided list. To
+filter the rows based on such a function, use the conditional function
+inside the selection brackets ``[]``. In this case, the condition inside
+the selection brackets ``titanic["Pclass"].isin([2, 3])`` checks for
+which rows the ``Pclass`` column is either 2 or 3.
+
+.. raw:: html
+
+
+
+
+The above is equivalent to filtering by rows for which the class is
+either 2 or 3 and combining the two statements with an ``|`` (or)
+operator:
+
+.. ipython:: python
+
+ class_23 = titanic[(titanic["Pclass"] == 2) | (titanic["Pclass"] == 3)]
+ class_23.head()
+
+.. note::
+ When combining multiple conditional statements, each condition
+ must be surrounded by parentheses ``()``. Moreover, you can not use
+ ``or``/``and`` but need to use the ``or`` operator ``|`` and the ``and``
+ operator ``&``.
+
+.. raw:: html
+
+
+ To user guide
+
+Conditional (boolean) indexing, see :ref:`indexing.boolean`. Specific information on ``isin``, see :ref:`indexing.basics.indexing_isin`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
+
+I want to work with passenger data for which the age is known.
+
+.. ipython:: python
+
+ age_no_na = titanic[titanic["Age"].notna()]
+ age_no_na.head()
+
+The ``notna`` conditional function returns a ``True`` for each row the
+values are not an ``Null`` value. As such, this can be combined with the
+selection brackets ``[]`` to filter the data table.
+
+.. raw:: html
+
+
+
+
+You might wonder what actually changed, as the first 5 lines are still
+the same values. One way to verify is to check if the shape has changed:
+
+.. ipython:: python
+
+ age_no_na.shape
+
+.. raw:: html
+
+
+ To user guide
+
+For more dedicated functions on missing values, see :ref:`missing-data`
+
+.. raw:: html
+
+
+
+How do I select specific rows and columns from a ``DataFrame``?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/03_subset_columns_rows.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I’m interested in the names of the passengers older than 35 years.
+
+.. ipython:: python
+
+ adult_names = titanic.loc[titanic["Age"] > 35, "Name"]
+ adult_names.head()
+
+In this case, a subset of both rows and columns is made in one go and
+just using selection brackets ``[]`` is not sufficient anymore. The
+``loc``/``iloc`` operators are required in front of the selection
+brackets ``[]``. When using ``loc``/``iloc``, the part before the comma
+is the rows you want, and the part after the comma is the columns you
+want to select.
+
+.. raw:: html
+
+
+
+
+When using the column names, row labels or a condition expression, use
+the ``loc`` operator in front of the selection brackets ``[]``. For both
+the part before and after the comma, you can use a single label, a list
+of labels, a slice of labels, a conditional expression or a colon. Using
+a colon specificies you want to select all rows or columns.
+
+.. raw:: html
+
+
+
+
+I’m interested in rows 10 till 25 and columns 3 to 5.
+
+.. ipython:: python
+
+ titanic.iloc[9:25, 2:5]
+
+Again, a subset of both rows and columns is made in one go and just
+using selection brackets ``[]`` is not sufficient anymore. When
+specifically interested in certain rows and/or columns based on their
+position in the table, use the ``iloc`` operator in front of the
+selection brackets ``[]``.
+
+.. raw:: html
+
+
+
+
+When selecting specific rows and/or columns with ``loc`` or ``iloc``,
+new values can be assigned to the selected data. For example, to assign
+the name ``anonymous`` to the first 3 elements of the third column:
+
+.. ipython:: python
+
+ titanic.iloc[0:3, 3] = "anonymous"
+ titanic.head()
+
+.. raw:: html
+
+
+ To user guide
+
+For more detailed description on selecting subsets of a data table, see :ref:`indexing.choice`
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
REMEMBER
+
+- When selecting subsets of data, square brackets ``[]`` are used.
+- Inside these brackets, you can use a single column/row label, a list
+ of column/row labels, a slice of labels, a conditional expression or
+ a colon.
+- Select specific rows and/or columns using ``loc`` when using the row
+ and column names
+- Select specific rows and/or columns using ``iloc`` when using the
+ positions in the table
+- You can assign new values to a selection based on ``loc``/``iloc``.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+Further details about indexing is provided in :ref:`indexing`
+
+.. raw:: html
+
+
diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst
new file mode 100644
index 0000000000000..243ce4735eef4
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst
@@ -0,0 +1,244 @@
+.. _10min_tut_04_plotting:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+ import matplotlib.pyplot as plt
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Air quality data
+
+
+
+
+
+For this tutorial, air quality data about :math:`NO_2` is used, made
+available by `openaq `__ and using the
+`py-openaq `__ package.
+The ``air_quality_no2.csv`` data set provides :math:`NO_2` values for
+the measurement stations *FR04014*, *BETR801* and *London Westminster*
+in respectively Paris, Antwerp and London.
+
+.. raw:: html
+
+
+
+.. ipython:: python
+
+ air_quality = pd.read_csv("data/air_quality_no2.csv",
+ index_col=0, parse_dates=True)
+ air_quality.head()
+
+.. note::
+ The usage of the ``index_col`` and ``parse_dates`` parameters of the ``read_csv`` function to define the first (0th) column as index of the resulting DataFrame and convert the dates in the column to datetime objects, respectively.
+
+.. raw:: html
+
+
+
+
+
+How to create plots in pandas?
+------------------------------
+
+.. image:: ../../_static/schemas/04_plot_overview.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want a quick visual check of the data.
+
+.. ipython:: python
+
+ @savefig 04_airqual_quick.png
+ air_quality.plot()
+
+With a DataFrame, pandas creates by default one line plot for each of
+the columns with numeric data.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+
+
+I want to plot only the columns of the data table with the data from Paris.
+
+.. ipython:: python
+
+ @savefig 04_airqual_paris.png
+ air_quality["station_paris"].plot()
+
+To plot a specific column, use the selection method of the
+:ref:`subset data tutorial <10min_tut_03_subset>` in combination with the ``plot``
+method. Hence, the ``plot`` method works on both ``Series`` and
+``DataFrame``.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+
+
+I want to visually compare the :math:`N0_2` values measured in London versus Paris.
+
+.. ipython:: python
+
+ @savefig 04_airqual_scatter.png
+ air_quality.plot.scatter(x="station_london",
+ y="station_paris",
+ alpha=0.5)
+
+.. raw:: html
+
+
+
+
+Apart from the default ``line`` plot when using the ``plot`` function, a
+number of alternatives are available to plot data. Let’s use some
+standard Python to get an overview of the available plot methods:
+
+.. ipython:: python
+
+ [method_name for method_name in dir(air_quality.plot) if not method_name.startswith("_") ]
+
+.. note::
+ In many development environments as well as ipython and
+ jupyter notebook, use the TAB button to get an overview of the available
+ methods, for example ``air_quality.plot.`` + TAB.
+
+One of the options is ``box``, which refers to a
+`boxplot `__. Also the ``box``
+method is applicable on the air quality example data:
+
+.. ipython:: python
+
+ @savefig 04_airqual_boxplot.png
+ air_quality.plot.box()
+
+.. raw:: html
+
+
+ To user guide
+
+For an introduction to the other plot methods, see :ref:`visualization.other`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
+
+I want each of the columns in a separate subplot.
+
+.. ipython:: python
+
+ @savefig 04_airqual_area_subplot.png
+ axs = air_quality.plot.area(figsize=(12, 4), subplots=True)
+
+Separate subplots for each of the data columns is supported by the ``subplots`` argument
+of the plot functions. The builtin options available in each of the pandas plot
+functions that are worthwhile to have a look.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+Some more formatting options of the pandas plot functionalities are explained in :ref:`visualization.formatting`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
+
+I want to further customize, extend or save the resulting plot.
+
+.. ipython:: python
+
+ fig, axs = plt.subplots(figsize=(12, 4));
+ air_quality.plot.area(ax=axs);
+ @savefig 04_airqual_customized.png
+ axs.set_ylabel("NO$_2$ concentration");
+ fig.savefig("no2_concentrations.png")
+
+.. raw:: html
+
+
+
+
+Each of the plot objects created by pandas are a
+`matplotlib `__ object. As Matplotlib provides
+plenty of options to customize plots, making the link between pandas and
+matplotlib explicit enables all the power of matplotlib to the plot.
+This strategy is applied in the previous example:
+
+::
+
+ fig, axs = plt.subplots(figsize=(12, 4)) # Create an empty matplotlib Figure and Axes
+ air_quality.plot.area(ax=axs) # Use pandas to put the area plot on the prepared Figure/Axes
+ axs.set_ylabel("NO$_2$ concentration") # Do any matplotlib customization you like
+ fig.savefig("no2_concentrations.png") # Save the Figure/Axes using the existing matplotlib method.
+
+.. raw:: html
+
+
+
REMEMBER
+
+- The ``.plot`` methods are applicable on both Series and DataFrames
+- By default, each of the columns is plotted as a different element
+ (line, boxplot,…)
+- Any plot created by pandas is a matplotlib object.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+Further details about indexing is provided in :ref:`visualization`.
+
+.. raw:: html
+
+
diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
new file mode 100644
index 0000000000000..1f3767d362b73
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
@@ -0,0 +1,186 @@
+.. _10min_tut_05_columns:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Air quality data
+
+
+
+
+
+For this tutorial, air quality data about :math:`NO_2` is used, made
+available by `openaq `__ and using the
+`py-openaq `__ package.
+The ``air_quality_no2.csv`` data set provides :math:`NO_2` values for
+the measurement stations *FR04014*, *BETR801* and *London Westminster*
+in respectively Paris, Antwerp and London.
+
+.. raw:: html
+
+
+
+How to create new columns derived from existing columns?
+--------------------------------------------------------
+
+.. image:: ../../_static/schemas/05_newcolumn_1.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want to express the :math:`NO_2` concentration of the station in London in mg/m\ :math:`^3`
+
+(*If we assume temperature of 25 degrees Celsius and pressure of 1013
+hPa, the conversion factor is 1.882*)
+
+.. ipython:: python
+
+ air_quality["london_mg_per_cubic"] = air_quality["station_london"] * 1.882
+ air_quality.head()
+
+To create a new column, use the ``[]`` brackets with the new column name
+at the left side of the assignment.
+
+.. raw:: html
+
+
+
+
+.. note::
+ The calculation of the values is done **element_wise**. This
+ means all values in the given column are multiplied by the value 1.882
+ at once. You do not need to use a loop to iterate each of the rows!
+
+.. image:: ../../_static/schemas/05_newcolumn_2.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want to check the ratio of the values in Paris versus Antwerp and save the result in a new column
+
+.. ipython:: python
+
+ air_quality["ratio_paris_antwerp"] = air_quality["station_paris"] / air_quality["station_antwerp"]
+ air_quality.head()
+
+The calculation is again element-wise, so the ``/`` is applied *for the
+values in each row*.
+
+.. raw:: html
+
+
+
+
+Also other mathematical operators (+, -, \*, /) or
+logical operators (<, >, =,…) work element wise. The latter was already
+used in the :ref:`subset data tutorial <10min_tut_03_subset>` to filter
+rows of a table using a conditional expression.
+
+.. raw:: html
+
+
+
+
+I want to rename the data columns to the corresponding station identifiers used by openAQ
+
+.. ipython:: python
+
+ air_quality_renamed = air_quality.rename(columns = {"station_antwerp": "BETR801",
+ "station_paris": "FR04014",
+ "station_london": "London Westminster"})
+
+.. ipython:: python
+
+ air_quality_renamed.head()
+
+The ``rename`` function can be used for both row labels and column
+labels. Provide a dictionary with the keys the current names and the
+values the new names to update the corresponding names.
+
+.. raw:: html
+
+
+
+
+The mapping should not be restricted to fixed names only, but can be a
+mapping function as well. For example, converting the column names to
+lowercase letters can be done using a function as well:
+
+.. ipython:: python
+
+ air_quality_renamed = air_quality_renamed.rename(columns = str.lower)
+ air_quality_renamed.head()
+
+.. raw:: html
+
+
+ To user guide
+
+Further details about column or row label renaming is provided in :ref:`basics.rename`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
REMEMBER
+
+- Create a new column by assigning the output to the DataFrame with a
+ new column name in between the ``[]``.
+- Operations are element-wise, no need to loop over rows.
+- Use ``rename`` with a dictionary or function to rename row labels or
+ column names.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+Further details about column addition and deletion is provided in :ref:``TODO``
+(`label `__
+to add in sphinx)
+
+.. raw:: html
+
+
diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
new file mode 100644
index 0000000000000..f9d7bc34a69bb
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
@@ -0,0 +1,310 @@
+.. _10min_tut_06_stats:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Titanic data
+
+
+
+
+
+This tutorial uses the titanic data set, stored as CSV. The data
+consists of the following data columns:
+
+- PassengerId: Id of every passenger.
+- Survived: This feature have value 0 and 1. 0 for not survived and 1
+ for survived.
+- Pclass: There are 3 classes: Class 1, Class 2 and Class 3.
+- Name: Name of passenger.
+- Sex: Gender of passenger.
+- Age: Age of passenger.
+- SibSp: Indication that passenger have siblings and spouse.
+- Parch: Whether a passenger is alone or have family.
+- Ticket: Ticket number of passenger.
+- Fare: Indicating the fare.
+- Cabin: The cabin of passenger.
+- Embarked: The embarked category.
+
+.. raw:: html
+
+
+
+How to calculate summary statistics?
+------------------------------------
+
+Aggregating statistics
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/06_aggregate.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+What is the average age of the titanic passengers?
+
+.. ipython:: python
+
+ titanic["Age"].mean()
+
+.. raw:: html
+
+
+
+
+Different statistics are available and can be applied to columns with
+numerical data. Operations in general exclude missing data and operate
+across rows by default.
+
+.. image:: ../../_static/schemas/06_reduction.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+What is the median age and ticket fare price of the titanic passengers?
+
+.. ipython:: python
+
+ titanic[["Age", "Fare"]].median()
+
+The statistic applied to multiple columns of a ``DataFrame`` (the selection of two columns
+return a ``DataFrame``, see the :ref:`subset data tutorial <10min_tut_03_subset>`) is calculated for each numeric column.
+
+.. raw:: html
+
+
+
+
+The aggregating statistic can be calculated for multiple columns at the
+same time. Remember the ``describe`` function from :ref:`first tutorial <10min_tut_01_tableoriented>` tutorial?
+
+.. ipython:: python
+
+ titanic[["Age", "Fare"]].describe()
+
+Instead of the predefined statistics, specific combinations of
+aggregating statistics for given columns can be defined using the
+``agg`` method:
+
+.. ipython:: python
+
+ titanic.agg({'Age' : ['min', 'max', 'median', 'skew'],
+ 'Fare' : ['min', 'max', 'median', 'mean']})
+
+.. raw:: html
+
+
+ To user guide
+
+Further details about descriptive statistics is provided in :ref:`basics.stats`.
+
+.. raw:: html
+
+
+
+
+Aggregating statistics grouped by category
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/06_groupby.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+What is the average age for male versus female titanic passengers?
+
+.. ipython:: python
+
+ titanic[["Sex", "Age"]].groupby("Sex").mean()
+
+As our interest is the average age for each gender, a subselection on
+these two columns is made first: ``titanic[["Sex", "Age"]]``. Next, the
+``groupby`` method is applied on the ``Sex`` column to make a group per
+category. The average age *for each gender* is calculated and
+returned.
+
+.. raw:: html
+
+
+
+
+Calculating a given statistic (e.g. ``mean`` age) *for each category in
+a column* (e.g. male/female in the ``Sex`` column) is a common pattern.
+The ``groupby`` method is used to support this type of operations. More
+general, this fits in the more general ``split-apply-combine`` pattern:
+
+- **Split** the data into groups
+- **Apply** a function to each group independently
+- **Combine** the results into a data structure
+
+The apply and combine steps are typically done together in pandas.
+
+In the previous example, we explicitly selected the 2 columns first. If
+not, the ``mean`` method is applied to each column containing numerical
+columns:
+
+.. ipython:: python
+
+ titanic.groupby("Sex").mean()
+
+It does not make much sense to get the average value of the ``Pclass``.
+if we are only interested in the average age for each gender, the
+selection of columns (rectangular brackets ``[]`` as usual) is supported
+on the grouped data as well:
+
+.. ipython:: python
+
+ titanic.groupby("Sex")["Age"].mean()
+
+.. image:: ../../_static/schemas/06_groupby_select_detail.svg
+ :align: center
+
+.. note::
+ The `Pclass` column contains numerical data but actually
+ represents 3 categories (or factors) with respectively the labels ‘1’,
+ ‘2’ and ‘3’. Calculating statistics on these does not make much sense.
+ Therefore, pandas provides a ``Categorical`` data type to handle this
+ type of data. More information is provided in the user guide
+ :ref:`categorical` section.
+
+.. raw:: html
+
+
+
+
+What is the mean ticket fare price for each of the sex and cabin class combinations?
+
+.. ipython:: python
+
+ titanic.groupby(["Sex", "Pclass"])["Fare"].mean()
+
+Grouping can be done by multiple columns at the same time. Provide the
+column names as a list to the ``groupby`` method.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More information on groupby and the split-apply-combine approach is provided in :ref:`groupby`.
+
+.. raw:: html
+
+
+
+Count number of records by category
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/06_valuecounts.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+What is the number of passengers in each of the cabin classes?
+
+.. ipython:: python
+
+ titanic["Pclass"].value_counts()
+
+The ``value_counts`` function counts the number of records for each
+category in a column.
+
+.. raw:: html
+
+
+
+
+The function is a shortcut, as it is actually a groupby operation in combination with counting of the number of records
+within each group:
+
+.. ipython:: python
+
+ titanic.groupby("Pclass")["Pclass"].count()
+
+.. note::
+ Both ``size`` and ``count`` can be used in combination with
+ ``groupby``. Whereas ``size`` includes ``NaN`` values and just provides
+ the number of rows (size of the table), ``count`` excludes the missing
+ values. In the ``value_counts`` method, use the ``dropna`` argument to
+ include or exclude the ``NaN`` values.
+
+.. raw:: html
+
+
+ To user guide
+
+For more information about ``value_counts``, see :ref:`basics.discretization`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
REMEMBER
+
+- Aggregation statistics can be calculated on entire columns or rows
+- ``groupby`` provides the power of the *split-apply-combine* pattern
+- ``value_counts`` is a convenient shortcut to count the number of
+ entries in each category of a variable
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More information on groupby and the split-apply-combine approach is provided in :ref:`groupby`.
+
+.. raw:: html
+
+
diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
new file mode 100644
index 0000000000000..934811d5fae1b
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
@@ -0,0 +1,402 @@
+.. _10min_tut_07_reshape:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Titanic data
+
+
+
+
+
+This tutorial uses the titanic data set, stored as CSV. The data
+consists of the following data columns:
+
+- PassengerId: Id of every passenger.
+- Survived: This feature have value 0 and 1. 0 for not survived and 1
+ for survived.
+- Pclass: There are 3 classes: Class 1, Class 2 and Class 3.
+- Name: Name of passenger.
+- Sex: Gender of passenger.
+- Age: Age of passenger.
+- SibSp: Indication that passenger have siblings and spouse.
+- Parch: Whether a passenger is alone or have family.
+- Ticket: Ticket number of passenger.
+- Fare: Indicating the fare.
+- Cabin: The cabin of passenger.
+- Embarked: The embarked category.
+
+.. raw:: html
+
+
+
+This tutorial uses air quality data about :math:`NO_2` and Particulate matter less than 2.5
+micrometers, made available by
+`openaq `__ and using the
+`py-openaq `__ package.
+The ``air_quality_long.csv`` data set provides :math:`NO_2` and
+:math:`pm25` values for the measurement stations *FR04014*, *BETR801*
+and *London Westminster* in respectively Paris, Antwerp and London.
+
+The air-quality data set has the following columns:
+
+- city: city where the sensor is used, either Paris, Antwerp or London
+- country: country where the sensor is used, either FR, BE or GB
+- location: the id of the sensor, either *FR04014*, *BETR801* or
+ *London Westminster*
+- parameter: the parameter measured by the sensor, either :math:`NO_2`
+ or Particulate matter
+- value: the measured value
+- unit: the unit of the measured parameter, in this case ‘µg/m³’
+
+and the index of the ``DataFrame`` is ``datetime``, the datetime of the
+measurement.
+
+.. note::
+ The air-quality data is provided in a so-called *long format*
+ data representation with each observation on a separate row and each
+ variable a separate column of the data table. The long/narrow format is
+ also known as the `tidy data
+ format `__.
+
+.. raw:: html
+
+
+
+How to reshape the layout of tables?
+------------------------------------
+
+Sort table rows
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+
+
+
+I want to sort the titanic data according to the age of the passengers.
+
+.. ipython:: python
+
+ titanic.sort_values(by="Age").head()
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+
+
+I want to sort the titanic data according to the cabin class and age in descending order.
+
+.. ipython:: python
+
+ titanic.sort_values(by=['Pclass', 'Age'], ascending=False).head()
+
+With ``sort_values``, the rows in the table are sorted according to the
+defined column(s). The index will follow the row order.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More details about sorting of tables is provided in :ref:`basics.sorting`.
+
+.. raw:: html
+
+
+
+Long to wide table format
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Let’s use a small subset of the air quality data set. We focus on
+:math:`NO_2` data and only use the first two measurements of each
+location (i.e. the head of each group). The subset of data will be
+called ``no2_subset``
+
+.. ipython:: python
+
+ no2 = air_quality[air_quality["parameter"] == "no2"] # filter for no2 data only
+
+.. ipython:: python
+
+ no2_subset = no2.sort_index().groupby(["location"]).head(2) # use 2 measurements (head) for each location (groupby)
+ no2_subset
+
+.. image:: ../../_static/schemas/07_pivot.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want the values for the three stations as separate columns next to each other
+
+.. ipython:: python
+
+ no2_subset.pivot(columns="location", values="value")
+
+The ``pivot`` function is purely reshaping of the data: a single value
+for each index/column combination is required.
+
+.. raw:: html
+
+
+
+
+As pandas support plotting of multiple columns (see :ref:`plotting tutorial <10min_tut_04_plotting>`) out of the box, the conversion from
+*long* to *wide* table format enables the plotting of the different time
+series at the same time:
+
+.. ipython:: python
+
+ no2.head()
+
+.. ipython:: python
+
+ @savefig 7_reshape_columns.png
+ no2.pivot(columns="location", values="value").plot()
+
+.. note::
+ When the ``index`` parameter is not defined, the existing
+ index (row labels) is used.
+
+.. raw:: html
+
+
+ To user guide
+
+For more information about ``pivot``, see :ref:`reshaping.reshaping`.
+
+.. raw:: html
+
+
+
+Pivot table
+~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/07_pivot_table.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want the mean concentrations for :math:`NO_2` and :math:`PM_{2.5}` in each of the stations in table form
+
+.. ipython:: python
+
+ air_quality.pivot_table(values="value", index="location",
+ columns="parameter", aggfunc="mean")
+
+In the case of ``pivot``, the data is only rearranged. When multiple
+values need to be aggregated (in this specific case, the values on
+different time steps) ``pivot_table`` can be used, providing an
+aggregation function (e.g. mean) on how to combine these values.
+
+.. raw:: html
+
+
+
+
+Pivot table is a well known concept in spreadsheet software. When
+interested in summary columns for each variable separately as well, put
+the ``margin`` parameter to ``True``:
+
+.. ipython:: python
+
+ air_quality.pivot_table(values="value", index="location",
+ columns="parameter", aggfunc="mean",
+ margins=True)
+
+.. raw:: html
+
+
+ To user guide
+
+For more information about ``pivot_table``, see :ref:`reshaping.pivot`.
+
+
+.. raw:: html
+
+
+
+.. note::
+ If case you are wondering, ``pivot_table`` is indeed directly linked
+ to ``groupby``. The same result can be derived by grouping on both
+ ``parameter`` and ``location``:
+
+ ::
+
+ air_quality.groupby(["parameter", "location"]).mean()
+
+.. raw:: html
+
+
+ To user guide
+
+Have a look at ``groupby`` in combination with ``unstack`` at
+`:ref:``TODO LABEL`` `__
+
+.. raw:: html
+
+
+
+Wide to long format
+~~~~~~~~~~~~~~~~~~~
+
+Starting again from the wide format table created in the previous
+section:
+
+.. ipython:: python
+
+ no2_pivoted = no2.pivot(columns="location", values="value").reset_index()
+ no2_pivoted.head()
+
+.. image:: ../../_static/schemas/07_melt.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want to collect all air quality :math:`NO_2` measurements in a single column (long format)
+
+.. ipython:: python
+
+ no_2 = no2_pivoted.melt(id_vars="date.utc")
+ no_2.head()
+
+The ``melt`` method on a ``DataFrame`` converts the data table from wide
+format to long format. The column headers become the variable names in a
+newly created column.
+
+.. raw:: html
+
+
+
+
+The solution is the short version on how to apply ``melt``. The method
+will *melt* all columns NOT mentioned in ``id_vars`` together into two
+columns: A columns with the column header names and a column with the
+values itself. The latter column gets by default the name ``value``.
+
+The ``melt`` method can be defined in more detail:
+
+.. ipython:: python
+
+ no_2 = no2_pivoted.melt(id_vars="date.utc",
+ value_vars=["BETR801", "FR04014", "London Westminster"],
+ value_name="NO_2",
+ var_name="id_location")
+ no_2.head()
+
+The result in the same, but in more detail defined:
+
+- ``value_vars`` defines explicitly which columns to *melt* together
+- ``value_name`` provides a custom column name for the values column
+ instead of the default columns name ``value``
+- ``var_name`` provides a custom column name for the columns collecting
+ the column header names. Otherwise it takes the index name or a
+ default ``variable``
+
+Hence, the arguments ``value_name`` and ``var_name`` are just
+user-defined names for the two generated columns. The columns to melt
+are defined by ``id_vars`` and ``value_vars``.
+
+.. raw:: html
+
+
+ To user guide
+
+Conversion from wide to long format with ``melt`` is explained in :ref:`reshaping.melt`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
REMEMBER
+
+- Sorting by one or more columns is supported by ``sort_values``
+- The ``pivot`` function is purely restructering of the data,
+ ``pivot_table`` supports aggregations
+- The reverse of ``pivot`` (long to wide format) is ``melt`` (wide to
+ long format)
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More information on reshaping and pivoting is provided in :ref:`reshaping`.
+
+.. raw:: html
+
+
diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
new file mode 100644
index 0000000000000..42e3abe83d75c
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
@@ -0,0 +1,323 @@
+.. _10min_tut_08_combine:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Air quality Nitrate data
+
+
+
+
+
+For this tutorial, air quality data about :math:`NO_2` is used, made available by
+`openaq `__ and downloaded using the
+`py-openaq `__ package.
+
+The ``air_quality_no2_long.csv`` data set provides :math:`NO_2`
+values for the measurement stations *FR04014*, *BETR801* and *London
+Westminster* in respectively Paris, Antwerp and London.
+
+.. raw:: html
+
+
+
+.. ipython:: python
+
+ air_quality_no2 = pd.read_csv("data/air_quality_no2_long.csv",
+ parse_dates=True)
+ air_quality_no2 = air_quality_no2[["date.utc", "location", "parameter", "value"]] # focus on a subset of columns
+ air_quality_no2.head()
+
+.. raw:: html
+
+
+
+
+ Air quality Particulate matter data
+
+
+
+
+
+For this tutorial, air quality data about Particulate
+matter less than 2.5 micrometers is used, made available by
+`openaq `__ and downloaded using the
+`py-openaq `__ package.
+
+The ``air_quality_pm25_long.csv`` data set provides :math:`pm25`
+values for the measurement stations *FR04014*, *BETR801* and *London
+Westminster* in respectively Paris, Antwerp and London.
+
+.. raw:: html
+
+
+
+.. ipython:: python
+
+ air_quality_pm25 = pd.read_csv("data/air_quality_pm25_long.csv",
+ parse_dates=True)
+ air_quality_pm25 = air_quality_pm25[["date.utc", "location", "parameter", "value"]] # focus on a subset of columns
+ air_quality_pm25.head()
+
+.. raw:: html
+
+
+
+
+
+
+How to combine data from multiple tables?
+-----------------------------------------
+
+Concatenating objects
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/08_concat_row.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+I want to combine the measurements of :math:`NO_2` and :math:`pm_{25}`, two tables with a similar structure, in a single table
+
+.. ipython:: python
+
+ air_quality = pd.concat([air_quality_pm25, air_quality_no2], axis=0)
+ air_quality.head()
+
+The ``concat`` function performs concatenation operations of multiple
+tables along one of the axis (row-wise or column-wise).
+
+.. raw:: html
+
+
+
+
+By default concatenation is along axis 0, so the resulting table combines the rows
+of the input tables. Let’s check the shape of the original and the
+concatenated tables to verify the operation:
+
+.. ipython:: python
+
+ print('Shape of the `air_quality_pm25` table: ', air_quality_pm25.shape)
+ print('Shape of the `air_quality_no2` table: ', air_quality_no2.shape)
+ print('Shape of the resulting `air_quality` table: ', air_quality.shape)
+
+Hence, the resulting table has 3178 = 1110 + 2068 rows.
+
+.. note::
+ The **axis** argument will return in a number of pandas
+ methods that can be applied **along an axis**. A ``DataFrame`` has two
+ corresponding axes: the first running vertically downwards across rows
+ (axis 0), and the second running horizontally across columns (axis 1).
+ Most operations like concatenation or summary statistics are by default
+ across rows (axis 0), but can be applied across columns as well.
+
+Sorting the table on the datetime information illustrates also the
+combination of both tables, with the ``parameter`` column defining the
+origin of the table (either ``no2`` from table ``air_quality_no2`` or
+``pm25`` from table ``air_quality_pm25``):
+
+.. ipython:: python
+
+ air_quality = air_quality.sort_values("date.utc")
+ air_quality.head()
+
+In this specific example, the ``parameter`` column provided by the data
+ensures that each of the original tables can be identified. This is not
+always the case. the ``concat`` function provides a convenient solution
+with the ``keys`` argument, adding an additional (hierarchical) row
+index. For example:
+
+.. ipython:: python
+
+ air_quality_ = pd.concat([air_quality_pm25, air_quality_no2], keys=["PM25", "NO2"])
+
+.. ipython:: python
+
+ air_quality_.head()
+
+.. note::
+ The existence of multiple row/column indices at the same time
+ has not been mentioned within these tutorials. *Hierarchical indexing*
+ or *MultiIndex* is an advanced and powerfull pandas feature to analyze
+ higher dimensional data.
+
+ Multi-indexing is out of scope for this pandas introduction. For the
+ moment, remember that the function ``reset_index`` can be used to
+ convert any level of an index to a column, e.g.
+ ``air_quality.reset_index(level=0)``
+
+ .. raw:: html
+
+
+ To user guide
+
+ Feel free to dive into the world of multi-indexing at :ref:`advanced`.
+
+ .. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More options on table concatenation (row and column
+wise) and how ``concat`` can be used to define the logic (union or
+intersection) of the indexes on the other axes is provided at
+:ref:`merging.concat`.
+
+.. raw:: html
+
+
+
+Join tables using a common identifier
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: ../../_static/schemas/08_merge_left.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+Add the station coordinates, provided by the stations metadata table, to the corresponding rows in the measurements table.
+
+.. warning::
+ The air quality measurement station coordinates are stored in a data
+ file ``air_quality_stations.csv``, downloaded using the
+ `py-openaq `__ package.
+
+.. ipython:: python
+
+ stations_coord = pd.read_csv("data/air_quality_stations.csv")
+ stations_coord.head()
+
+.. note::
+ The stations used in this example (FR04014, BETR801 and London
+ Westminster) are just three entries enlisted in the metadata table. We
+ only want to add the coordinates of these three to the measurements
+ table, each on the corresponding rows of the ``air_quality`` table.
+
+.. ipython:: python
+
+ air_quality.head()
+
+.. ipython:: python
+
+ air_quality = pd.merge(air_quality, stations_coord,
+ how='left', on='location')
+ air_quality.head()
+
+Using the ``merge`` function, for each of the rows in the
+``air_quality`` table, the corresponding coordinates are added from the
+``air_quality_stations_coord`` table. Both tables have the column
+``location`` in common which is used as a key to combine the
+information. By choosing the ``left`` join, only the locations available
+in the ``air_quality`` (left) table, i.e. FR04014, BETR801 and London
+Westminster, end up in the resulting table. The ``merge`` function
+supports multiple join options similar to database-style operations.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+
+
+Add the parameter full description and name, provided by the parameters metadata table, to the measurements table
+
+.. warning::
+ The air quality parameters metadata are stored in a data file
+ ``air_quality_parameters.csv``, downloaded using the
+ `py-openaq `__ package.
+
+.. ipython:: python
+
+ air_quality_parameters = pd.read_csv("data/air_quality_parameters.csv")
+ air_quality_parameters.head()
+
+.. ipython:: python
+
+ air_quality = pd.merge(air_quality, air_quality_parameters,
+ how='left', left_on='parameter', right_on='id')
+ air_quality.head()
+
+Compared to the previous example, there is no common column name.
+However, the ``parameter`` column in the ``air_quality`` table and the
+``id`` column in the ``air_quality_parameters_name`` both provide the
+measured variable in a common format. The ``left_on`` and ``right_on``
+arguments are used here (instead of just ``on``) to make the link
+between the two tables.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+Pandas supports also innPandas supports also inner, outer, and right joins.
+More information on join/merge of tables is provided in
+:ref:`merging.join`. Or have a look to the
+:ref:`comparison with SQL`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
REMEMBER
+
+- Mulitple tables can be concatentated both column as row wise using
+ the ``concat`` function.
+- For database-like merging/joining of tables, use the ``merge``
+ function.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+The user guide provides more information on combining together data tables, see :ref:`merging`.
+
+.. raw:: html
+
+
diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
new file mode 100644
index 0000000000000..087369e6ce069
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
@@ -0,0 +1,382 @@
+.. _10min_tut_09_timeseries:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+ import matplotlib.pyplot as plt
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Air quality data
+
+
+
+
+
+For this tutorial, air quality data about :math:`NO_2` and Particulate
+matter less than 2.5 micrometers is used, made available by
+`openaq `__ and downloaded using the
+`py-openaq `__ package.
+The ``air_quality_no2_long.csv"`` data set provides :math:`NO_2` values
+for the measurement stations *FR04014*, *BETR801* and *London
+Westminster* in respectively Paris, Antwerp and London.
+
+.. raw:: html
+
+
+
+How to handle time series data with ease?
+-----------------------------------------
+
+Using pandas datetime properties
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+
+
+
+I want to work with the dates in the column ‘datetime’ as datetime objects instead of plain text
+
+.. ipython:: python
+
+ air_quality["datetime"] = pd.to_datetime(air_quality["datetime"])
+ air_quality["datetime"]
+
+Initially, the values in ``datetime`` are character strings and do not
+provide any datetime operations (e.g. extract the year, day of the
+week,…). By applying the ``to_datetime`` function, pandas interprets the
+strings and convert these to datetime (i.e. ``datetime64[ns, UTC]``)
+objects. In pandas we call these datetime objects similar to
+``datetime.datetime`` from the standard library a ``Timestamp``.
+
+.. raw:: html
+
+
+
+
+.. note::
+ As many data sets do contain datetime information in one of
+ the columns, pandas input function like ``read_csv`` and ``read_json``
+ can do the transformation to dates when reading the data using the
+ ``parse_dates`` parameter with a list of the columns to read as
+ Timestamp:
+
+ ::
+
+ pd.read_csv("../data/air_quality_no2_long.csv", parse_dates=["datetime"])
+
+Why are these ``Timestamp`` objects useful. Let’s illustrate the added
+value with some example cases.
+
+ What is the start and end date of the time series data set working
+ with?
+
+.. ipython:: python
+
+ air_quality["datetime"].min(), air_quality["datetime"].max()
+
+Using ``Timestamp`` for datetimes enable us to calculate with date
+information and make them comparable. Hence, we can use this to get the
+length of our time series:
+
+.. ipython:: python
+
+ air_quality["datetime"].max() - air_quality["datetime"].min()
+
+The result is a ``Timedelta`` object, similar to ``datetime.timedelta``
+from the standard Python library and defining a time duration.
+
+.. raw:: html
+
+
+ To user guide
+
+The main time related concepts of pandas are explained in :ref:`timeseries.overview`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
+
+I want to add a new column to the dataframe containing only the month of the measurement
+
+.. ipython:: python
+
+ air_quality["month"] = air_quality["datetime"].dt.month
+ air_quality.head()
+
+By using ``Timestamp`` objects for dates, a lot of time-related
+properties are provided by pandas. For example the ``month``, but also
+``year``, ``weekofyear``, ``quarter``,… All of these properties are
+accessible by the ``dt`` accessor.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+An overview of the existing properties is given in
+:ref:`timeseries.components`. More details about the ``dt`` accessor
+to return datetime like properties for the values of the Series are
+provided in :ref:`basics.dt_accessors`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
+
+What is the average :math:`NO_2` concentration for each day of the week for each of the measurement locations?
+
+.. ipython:: python
+
+ air_quality.groupby([air_quality["datetime"].dt.weekday, "location"])["value"].mean()
+
+Remember the split-apply-combine pattern provided by ``groupby`` from the
+:ref:`tutorial on statistics calculation <10min_tut_06_stats>`?
+Here, we want to calculate a given statistic (e.g. mean :math:`NO_2`)
+**for each weekday** and **for each measurement location**. To group on
+weekdays, we use the datetime property ``weekday`` (with Monday=0 and
+Sunday=6) of pandas ``Timestamp``, which is also accessible by the
+``dt`` accessor. The grouping on both locations and weekdays can be done
+to split the calculation of the mean on each of these combinations.
+
+.. danger::
+ As we are working with a very short time series in these
+ examples, the analysis does not provide a long-term representative
+ result!
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+
+
+Plot the typical :math:`NO_2` pattern during the day of our time series of all stations together. In other words, what is the average value for each hour of the day?
+
+.. ipython:: python
+
+ fig, axs = plt.subplots(figsize=(12, 4))
+ air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar', rot=0, ax=axs)
+ plt.xlabel("Hour of the day"); # custom label for the x-axis using matplotlib
+ @savefig 09_bar_chart.png
+ plt.ylabel("$NO_2\ (µg/m^3)$");
+
+Similar to the previous case, we want to calculate a given statistic
+(e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the
+split-apply-combine approach again.
+
+.. raw:: html
+
+
+
+
+Datetime as index
+~~~~~~~~~~~~~~~~~
+
+In the :ref:`tutorial on reshaping <10min_tut_07_reshape>`,
+``pivot`` was introduced to reshape the data table with each of the
+measurements locations as a separate column:
+
+.. ipython:: python
+
+ no_2 = air_quality.pivot(index="datetime", columns="location", values="value")
+ no_2.head()
+
+.. note::
+ By pivoting the data, the datetime information became the
+ index of the table. In general, setting a column as an index can be
+ achieved by the ``set_index`` function.
+
+Working with a datetime index (i.e. ``DatetimeIndex``) provides powerful
+functionalities. For example, we do not need the ``dt`` accessor to get
+the time series properties, but have these properties available on the
+index directly:
+
+.. ipython:: python
+
+ no_2.index.year, no_2.index.weekday
+
+Some other advantages are the convenient subsetting of time period or
+the adapted time scale on plots. Let’s apply this on our data.
+
+.. raw:: html
+
+
+
+
+Create a plot of the :math:`NO_2` values in the different stations from the 20th of May till the end of 21st of May
+
+.. ipython:: python
+
+ @savefig 09_time_section.png
+ no_2["2019-05-20" : "2019-05-21"].plot();
+
+By providing a **string that parses to a datetime**, a specific subset of the data can be selected on a ``DatetimeIndex``.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More information on the ``DatetimeIndex`` and the slicing by using strings is provided in :ref:`timeseries.datetimeindex`.
+
+.. raw:: html
+
+
+
+Resample a time series to another frequency
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+
+
+
+Aggregate the current hourly time series values to the monthly maximum value in each of the stations.
+
+.. ipython:: python
+
+ monthly_max = no_2.resample("M").max()
+ monthly_max
+
+A very powerful method on time series data with a datetime index, is the
+ability to ``resample`` time series to another frequency (e.g.,
+converting secondly data into 5-minutely data).
+
+.. raw:: html
+
+
+
+
+The ``resample`` method is similar to a groupby operation:
+
+- it provides a time-based grouping, by using a string (e.g. ``M``,
+ ``5H``,…) that defines the target frequency
+- it requires an aggregation function such as ``mean``, ``max``,…
+
+.. raw:: html
+
+
+ To user guide
+
+An overview of the aliases used to define time series frequencies (alos called *offset aliases*) is given in :ref:`timeseries.offset_aliases`.
+
+.. raw:: html
+
+
+
+When defined, the frequency of the time series is provided by the
+``freq`` attribute:
+
+.. ipython:: python
+
+ monthly_max.index.freq
+
+.. raw:: html
+
+
+
+
+Make a plot of the daily median :math:`NO_2` value in each of the stations.
+
+.. ipython:: python
+
+ no_2.resample("D").mean().plot(style="-o", figsize=(10, 5))
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More details on the power of time series ``resampling`` is provided in :ref:`timeseries.resampling`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
REMEMBER
+
+- Valid date strings can be converted to datetime objects using
+ ``to_datetime`` function or as part of read functions.
+- Datetime objects in pandas supports calculations, logical operations
+ and convenient date-related properties using the ``dt`` accessor.
+- A ``DatetimeIndex`` contains these date-related properties and
+ supports convenient slicing.
+- ``Resample`` is a powerful method to change the frequency of a time
+ series.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More information on time series and date functionalities is given in :ref:`timeseries`.
+
+.. raw:: html
+
+
\ No newline at end of file
diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst
new file mode 100644
index 0000000000000..dd5fc5dcbb08e
--- /dev/null
+++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst
@@ -0,0 +1,278 @@
+.. _10min_tut_10_text:
+
+{{ header }}
+
+.. ipython:: python
+
+ import pandas as pd
+
+.. raw:: html
+
+
+
+
+ Data used for this tutorial:
+
+
+
+
+
+ Titanic data
+
+
+
+
+
+This tutorial uses the titanic data set, stored as CSV. The data
+consists of the following data columns:
+
+- PassengerId: Id of every passenger.
+- Survived: This feature have value 0 and 1. 0 for not survived and 1
+ for survived.
+- Pclass: There are 3 classes: Class 1, Class 2 and Class 3.
+- Name: Name of passenger.
+- Sex: Gender of passenger.
+- Age: Age of passenger.
+- SibSp: Indication that passenger have siblings and spouse.
+- Parch: Whether a passenger is alone or have family.
+- Ticket: Ticket number of passenger.
+- Fare: Indicating the fare.
+- Cabin: The cabin of passenger.
+- Embarked: The embarked category.
+
+.. raw:: html
+
+
+
+How to manipulate data columns with textual data?
+-------------------------------------------------
+
+.. raw:: html
+
+
+
+
+Make all name characters lowercase
+
+.. ipython:: python
+
+ titanic["Name"].str.lower()
+
+To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column
+(see :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
+apply the ``lower`` method. As such, each of the strings is converted element wise.
+
+.. raw:: html
+
+
+
+
+Similar to datetime objects in the :ref:`time series tutorial <10min_tut_09_timeseries>`
+having a ``dt`` accessor, a number of
+specialized string methods are available when using the ``str``
+accessor. These methods have in general matching names with the
+equivalent built-in string methods for single elements, but are applied
+element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?)
+on each of the values of the columns.
+
+.. raw:: html
+
+
+
+
+Create a new column ``Surname`` that contains the surname of the Passengers by extracting the part before the comma.
+
+.. ipython:: python
+
+ titanic["Name"].str.split(",")
+
+Using the ``split`` method, each of the values is returned as a list of
+2 elements. The first element is the part before the comma and the
+second element the part after the comma.
+
+.. ipython:: python
+
+ titanic["Surname"] = titanic["Name"].str.split(",").str.get(0)
+ titanic["Surname"]
+
+As we are only interested in the first part representing the surname
+(element 0), we can again use the ``str`` accessor and apply ``get`` to
+extract the relevant part. Indeed, these string functions can be
+concatenated to combine multiple functions at once!
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More information on extracting parts of strings is available in :ref:`text.split`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
+
+Extract the passenger data about the Countess on board of the Titanic.
+
+.. ipython:: python
+
+ titanic["Name"].str.contains("Countess")
+
+.. ipython:: python
+
+ titanic[titanic["Name"].str.contains("Countess")]
+
+(*Interested in her story? See*\ `Wikipedia `__\ *!*)
+
+The string method ``contains`` checks for each of the values in the
+column ``Name`` if the string contains the word ``Countess`` and returns
+for each of the values ``True`` (``Countess`` is part of the name) of
+``False`` (``Countess`` is notpart of the name). This output can be used
+to subselect the data using conditional (boolean) indexing introduced in
+the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was
+only 1 Countess on the Titanic, we get one row as a result.
+
+.. raw:: html
+
+
+
+
+.. note::
+ More powerful extractions on strings is supported, as the
+ ``contains`` and ``extract`` methods accepts `regular
+ expressions `__, but out of
+ scope of this tutorial.
+
+.. raw:: html
+
+
+ To user guide
+
+More information on extracting parts of strings is available in :ref:`text.extract`.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+
+
+Which passenger of the titanic has the longest name?
+
+.. ipython:: python
+
+ titanic["Name"].str.len()
+
+To get the longest name we first have to get the lenghts of each of the
+names in the ``Name`` column. By using pandas string methods, the
+``len`` function is applied to each of the names individually
+(element-wise).
+
+.. ipython:: python
+
+ titanic["Name"].str.len().idxmax()
+
+Next, we need to get the corresponding location, preferably the index
+label, in the table for which the name length is the largest. The
+``idxmax`` method does exactly that. It is not a string method and is
+applied to integers, so no ``str`` is used.
+
+.. ipython:: python
+
+ titanic.loc[titanic["Name"].str.len().idxmax(), "Name"]
+
+Based on the index name of the row (``307``) and the column (``Name``),
+we can do a selection using the ``loc`` operator, introduced in the
+`tutorial on subsetting <3_subset_data.ipynb>`__.
+
+.. raw:: html
+
+
+
+
+.. raw:: html
+
+
+
+
+In the ‘Sex’ columns, replace values of ‘male’ by ‘M’ and all ‘female’ values by ‘F’
+
+.. ipython:: python
+
+ titanic["Sex_short"] = titanic["Sex"].replace({"male": "M",
+ "female": "F"})
+ titanic["Sex_short"]
+
+Whereas ``replace`` is not a string method, it provides a convenient way
+to use mappings or vocabularies to translate certain values. It requires
+a ``dictionary`` to define the mapping ``{from : to}``.
+
+.. raw:: html
+
+
+
+
+.. warning::
+ There is also a ``str.replace`` methods available to replace a
+ specific set of characters. However, when having a mapping of multiple
+ values, this would become:
+
+ ::
+
+ titanic["Sex_short"] = titanic["Sex"].str.replace("female", "F")
+ titanic["Sex_short"] = titanic["Sex_short"].str.replace("male", "M")
+
+ This would become cumbersome and easily lead to mistakes. Just think (or
+ try out yourself) what would happen if those two statements are applied
+ in the opposite order…
+
+.. raw:: html
+
+
+
REMEMBER
+
+- String methods are available using the ``str`` accessor.
+- String methods work element wise and can be used for conditional
+ indexing.
+- The ``replace`` method is a convenient method to convert values
+ according to a given dictionary.
+
+.. raw:: html
+
+
+
+.. raw:: html
+
+
+ To user guide
+
+More information on string methods is given in :ref:`text`.
+
+.. raw:: html
+
+
- To raw data
+ To raw data
diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
index 97ce7eb4e7e52..4254852d51a85 100644
--- a/doc/source/getting_started/intro_tutorials/03_subset_data.rst
+++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
@@ -43,7 +43,7 @@ consists of the following data columns:
.. raw:: html
- To raw data
+ To raw data
diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst
index 243ce4735eef4..6dace72fc11a4 100644
--- a/doc/source/getting_started/intro_tutorials/04_plotting.rst
+++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst
@@ -34,7 +34,7 @@ in respectively Paris, Antwerp and London.
.. raw:: html
- To raw data
+ To raw data
diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
index 1f3767d362b73..b8b50fdde118e 100644
--- a/doc/source/getting_started/intro_tutorials/05_add_columns.rst
+++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
@@ -33,7 +33,7 @@ in respectively Paris, Antwerp and London.
.. raw:: html
- To raw data
+ To raw data
diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
index f9d7bc34a69bb..ad3e34f298164 100644
--- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
@@ -43,7 +43,7 @@ consists of the following data columns:
.. raw:: html
- To raw data
+ To raw data
diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
index 934811d5fae1b..ef44fc5cee44a 100644
--- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
+++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
@@ -43,7 +43,7 @@ consists of the following data columns:
.. raw:: html
- To raw data
+ To raw data
diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
index 42e3abe83d75c..6c951bced4ba3 100644
--- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
+++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
@@ -34,7 +34,7 @@ Westminster* in respectively Paris, Antwerp and London.
.. raw:: html
- To raw data
+ To raw data
@@ -68,7 +68,7 @@ Westminster* in respectively Paris, Antwerp and London.
.. raw:: html
- To raw data
+ To raw data
diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
index 087369e6ce069..edb259a74e923 100644
--- a/doc/source/getting_started/intro_tutorials/09_timeseries.rst
+++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
@@ -35,7 +35,7 @@ Westminster* in respectively Paris, Antwerp and London.
.. raw:: html
- To raw data
+ To raw data
diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst
index dd5fc5dcbb08e..3097a22be91eb 100644
--- a/doc/source/getting_started/intro_tutorials/10_text_data.rst
+++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst
@@ -43,7 +43,7 @@ consists of the following data columns:
.. raw:: html
- To raw data
+ To raw data
From aa46ef4f79f6b1a7b5750dac760f263c080e6a2a Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 09:12:14 +0100
Subject: [PATCH 03/47] Move reference to top of title
---
doc/source/user_guide/text.rst | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index 072871f89bdae..809cc79524831 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -177,12 +177,11 @@ and replacing any remaining whitespaces with underscores:
Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few
exceptions, other uses are not supported, and may be disabled at a later point.
+.. _text.split:
Splitting and replacing strings
-------------------------------
-.. _text.split:
-
Methods like ``split`` return a Series of lists:
.. ipython:: python
From 351a523055ce0cc214f454a2ec3d5cea6ca478c2 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 09:16:12 +0100
Subject: [PATCH 04/47] Fix minor mistakes in text
---
.../getting_started/intro_tutorials/08_combine_dataframes.rst | 4 ++--
doc/source/getting_started/intro_tutorials/09_timeseries.rst | 3 ++-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
index 6c951bced4ba3..55b2db0654b30 100644
--- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
+++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
@@ -61,7 +61,7 @@ matter less than 2.5 micrometers is used, made available by
`openaq `__ and downloaded using the
`py-openaq `__ package.
-The ``air_quality_pm25_long.csv`` data set provides :math:`pm25`
+The ``air_quality_pm25_long.csv`` data set provides :math:`PM_{25}`
values for the measurement stations *FR04014*, *BETR801* and *London
Westminster* in respectively Paris, Antwerp and London.
@@ -100,7 +100,7 @@ Concatenating objects
-I want to combine the measurements of :math:`NO_2` and :math:`pm_{25}`, two tables with a similar structure, in a single table
+I want to combine the measurements of :math:`NO_2` and :math:`PM_{25}`, two tables with a similar structure, in a single table
.. ipython:: python
diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
index edb259a74e923..c1b2378c7de83 100644
--- a/doc/source/getting_started/intro_tutorials/09_timeseries.rst
+++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
@@ -211,7 +211,8 @@ Plot the typical :math:`NO_2` pattern during the day of our time series of all s
Similar to the previous case, we want to calculate a given statistic
(e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the
-split-apply-combine approach again.
+split-apply-combine approach again. For this case, the datetime property ``hour``
+of pandas ``Timestamp``, which is also accessible by the ``dt`` accessor.
.. raw:: html
From 68409058c419ce0c19c6ec80469a86eb45cea479 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 09:17:10 +0100
Subject: [PATCH 05/47] Add missing reference to user guide section
---
.../intro_tutorials/07_reshape_table_layout.rst | 3 +--
doc/source/user_guide/reshaping.rst | 2 ++
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
index ef44fc5cee44a..fa2eb49628839 100644
--- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
+++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
@@ -294,8 +294,7 @@ For more information about ``pivot_table``, see :ref:`reshaping.pivot`.
To user guide
-Have a look at ``groupby`` in combination with ``unstack`` at
-`:ref:``TODO LABEL`` `__
+Have a look at ``groupby`` in combination with ``unstack`` at :ref:`reshaping.combine_with_groupby`.
.. raw:: html
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
index 8583a9312b690..73903cca6546d 100644
--- a/doc/source/user_guide/reshaping.rst
+++ b/doc/source/user_guide/reshaping.rst
@@ -314,6 +314,8 @@ user-friendly.
dft
pd.wide_to_long(dft, ["A", "B"], i="id", j="year")
+.. _reshaping.combine_with_groupby:
+
Combining with stats and GroupBy
--------------------------------
From bf1981745445600615acc974c698f35613f10b27 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 09:28:38 +0100
Subject: [PATCH 06/47] Add missing reference to user guide section
---
doc/source/getting_started/dsintro.rst | 1 +
doc/source/getting_started/intro_tutorials/05_add_columns.rst | 4 +---
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst
index a07fcbd8b67c4..1a0fed8fc4b9b 100644
--- a/doc/source/getting_started/dsintro.rst
+++ b/doc/source/getting_started/dsintro.rst
@@ -444,6 +444,7 @@ dtype. For example:
data
pd.DataFrame.from_records(data, index='C')
+.. _basics.dataframe.sel_add_del:
Column selection, addition, deletion
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
index b8b50fdde118e..da49bdbaba511 100644
--- a/doc/source/getting_started/intro_tutorials/05_add_columns.rst
+++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
@@ -177,9 +177,7 @@ Further details about column or row label renaming is provided in :ref:`basics.r
To user guide
-Further details about column addition and deletion is provided in :ref:``TODO``
-(`label `__
-to add in sphinx)
+Further details about column addition and deletion is provided in :ref:`basics.dataframe.sel_add_del`.
.. raw:: html
From 49858f91ef925ab47a02504b830dc7a93bce70cd Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 09:56:59 +0100
Subject: [PATCH 07/47] Remove created artifacts
---
.../getting_started/intro_tutorials/02_read_write.rst | 6 ++++++
doc/source/getting_started/intro_tutorials/04_plotting.rst | 6 ++++++
2 files changed, 12 insertions(+)
diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst
index 03e94b36ac5f5..0f9848dfa5196 100644
--- a/doc/source/getting_started/intro_tutorials/02_read_write.rst
+++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst
@@ -147,6 +147,12 @@ the data as an excel file. In the example here, the ``sheet_name`` is
named *passengers* instead of the default *Sheet1*. By setting
``index=False`` the row index labels are not saved in the spreadsheet.
+.. ipython:: python
+ :suppress:
+
+ import os
+ os.remove('titanic.xlsx')
+
.. raw:: html
diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst
index 6dace72fc11a4..0683a8f55d124 100644
--- a/doc/source/getting_started/intro_tutorials/04_plotting.rst
+++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst
@@ -200,6 +200,12 @@ I want to further customize, extend or save the resulting plot.
axs.set_ylabel("NO$_2$ concentration");
fig.savefig("no2_concentrations.png")
+.. ipython:: python
+ :suppress:
+
+ import os
+ os.remove('no2_concentrations.png')
+
.. raw:: html
From 6ca5e595dd6a7ea0cc734668013080e0b7586c36 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 09:57:31 +0100
Subject: [PATCH 08/47] Update link to raw data
---
.../intro_tutorials/07_reshape_table_layout.rst | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
index fa2eb49628839..2d0d862940e9b 100644
--- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
+++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
@@ -68,7 +68,7 @@ micrometers, made available by
`openaq `__ and using the
`py-openaq `__ package.
The ``air_quality_long.csv`` data set provides :math:`NO_2` and
-:math:`pm25` values for the measurement stations *FR04014*, *BETR801*
+:math:`PM_{25}` values for the measurement stations *FR04014*, *BETR801*
and *London Westminster* in respectively Paris, Antwerp and London.
The air-quality data set has the following columns:
@@ -95,7 +95,7 @@ measurement.
.. raw:: html
- To raw data
+ To raw data
From a40fff8ca5ef454167c446e12acd4d63cd52d069 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 09:58:20 +0100
Subject: [PATCH 09/47] Change title of text tutorial
---
doc/source/getting_started/intro_tutorials/10_text_data.rst | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst
index 3097a22be91eb..cb26b1252eed6 100644
--- a/doc/source/getting_started/intro_tutorials/10_text_data.rst
+++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst
@@ -58,8 +58,8 @@ consists of the following data columns:
-How to manipulate data columns with textual data?
--------------------------------------------------
+How to manipulate textual data?
+-------------------------------
.. raw:: html
From 89aa1544afe93b8a0afcb7ff04d9e9e19a82ab31 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 10:36:58 +0100
Subject: [PATCH 10/47] Change title of tutorial
---
doc/source/getting_started/intro_tutorials/03_subset_data.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
index 4254852d51a85..cd8278d83048b 100644
--- a/doc/source/getting_started/intro_tutorials/03_subset_data.rst
+++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
@@ -58,7 +58,7 @@ consists of the following data columns:
-How do I select a subset of data in a ``DataFrame``?
+How do I select a subset of a ``DataFrame``?
====================================================
How do I select specific columns from a ``DataFrame``?
From 2cff6f8ae24ce9db2399d26e32c68f0abcb01020 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 10:37:13 +0100
Subject: [PATCH 11/47] Update
---
doc/source/index.rst.template | 1 +
1 file changed, 1 insertion(+)
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index 9ec330c956ff1..5eb65069c3396 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -57,6 +57,7 @@ See the :ref:`overview` for more detail about what's in the library.
* :doc:`getting_started/install`
* :doc:`getting_started/overview`
* :doc:`getting_started/10min`
+ * :doc:`getting_started/intro_tutorials/index`
* :doc:`getting_started/basics`
* :doc:`getting_started/dsintro`
* :doc:`getting_started/comparison/index`
From 3b12e776a166df91d606b1d1c7c9fcdef10c29cf Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 25 Nov 2019 22:55:16 +0100
Subject: [PATCH 12/47] Add draft index page of getting started
---
doc/source/getting_started/index.rst | 182 +++++++++++++++++++++++++++
1 file changed, 182 insertions(+)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 1a8cb242a110e..7bc6333904c9b 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -6,6 +6,188 @@
Getting started
===============
+Installation
+------------
+
+Before you can use pandas, you’ll need to get it installed.
+
+.. raw:: html
+
+
+
+
+
+
+ Working with conda?
+
+
+
+
+Pandas is part of the `Anaconda `__ distribution and can be
+installed with Anaconda or Miniconda:
+
+.. raw:: html
+
+
+
+
+
+
+
+
+
+ Prefer pip?
+
+
+
+
+Pandas can be installed via pip from `PyPI `__.
+
+.. raw:: html
+
+
+
+
+
+
+
+
+
+ In-depth instructions?
+
+
+
Installing a specific version?
+ Installing from source?
+ Check the advanced installation page.
+
+.. container:: custom-button
+
+ :ref:`Learn more `
+
+.. raw:: html
+
+
+
+
+
+
+
+.. _gentle_intro:
+
+Intro to pandas
+---------------
+
+TODO
+
+
+.. _comingfrom:
+
+Coming from...
+--------------
+
+Currently working with other software for data manipulation in a tabular format? You're probably familiar to typical
+data operations and know *what* to do with your tabular data, but lacking the syntax to execute these operations. Get to know
+the pandas syntax by looking for equivalents from the software you already know:
+
+.. raw:: html
+
+
+
+
+
+
+
+
The R programming language provides the data.frame data structure and multiple packages,
+ such as tidyverse use and extend data.frames for convenient data handling
+ functionalities similar to pandas.
+
+.. container:: custom-button
+
+ :ref:`Learn more `
+
+.. raw:: html
+
+
+
+
+
+
+
+
+
Already familiar to SELECT, GROUP BY, JOIN,...?
+ Most of these SQL manipulations do have equivalents in pandas.
+
+.. container:: custom-button
+
+ :ref:`Learn more `
+
+.. raw:: html
+
+
+
+
+
+
+
+
+
The data set included in the
+ STATA statistical software suite corresponds
+ to the pandas data.frame. Many of the operations known from STATA have an equivalent
+ in pandas.
+
+.. container:: custom-button
+
+ :ref:`Learn more `
+
+.. raw:: html
+
+
+
+
+
+
+
+
+
The SAS statistical software suite
+ also provides the data set corresponding to the pandas data.frame.
+ Also vectorized operations, filtering, string processing operations,... from SAS have similar
+ functions in pandas.
+
+.. container:: custom-button
+
+ :ref:`Learn more `
+
+.. raw:: html
+
+
The R programming language provides the data.frame data structure and multiple packages,
such as tidyverse use and extend data.frames for convenient data handling
@@ -128,7 +128,7 @@ the pandas syntax by looking for equivalents from the software you already know:
-
+
Already familiar to SELECT, GROUP BY, JOIN,...?
Most of these SQL manipulations do have equivalents in pandas.
@@ -144,7 +144,7 @@ the pandas syntax by looking for equivalents from the software you already know:
-
+
The data set included in the
STATA statistical software suite corresponds
@@ -162,7 +162,7 @@ the pandas syntax by looking for equivalents from the software you already know:
-
+
The SAS statistical software suite
also provides the data set corresponding to the pandas data.frame.
@@ -193,6 +193,7 @@ material is enlisted in the community contributed :ref:`tutorials`.
.. toctree::
:maxdepth: 2
+ :hidden:
install
overview
From a2f2f819bb93cfaff80e1403d06253496b921022 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Tue, 26 Nov 2019 23:25:20 +0100
Subject: [PATCH 16/47] Fix toctree on getting started page
---
doc/source/index.rst.template | 2 ++
1 file changed, 2 insertions(+)
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index 5eb65069c3396..1ef6ebdb3d4d9 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -26,6 +26,7 @@ See the :ref:`overview` for more detail about what's in the library.
{% if single_doc and single_doc.endswith('.rst') -%}
.. toctree::
:maxdepth: 3
+ :titlesonly:
{{ single_doc[:-4] }}
{% elif single_doc %}
@@ -37,6 +38,7 @@ See the :ref:`overview` for more detail about what's in the library.
.. toctree::
:maxdepth: 3
:hidden:
+ :titlesonly:
{% endif %}
{% if not single_doc %}
What's New in 1.0.0
From d45040cbb987e19caaa322779dad8bc68c7db82f Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 13 Jan 2020 12:19:25 +0100
Subject: [PATCH 17/47] Add pandas colors to color cycle
---
doc/source/getting_started/intro_tutorials/04_plotting.rst | 3 +++
1 file changed, 3 insertions(+)
diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst
index 0683a8f55d124..78e4047f4e0d0 100644
--- a/doc/source/getting_started/intro_tutorials/04_plotting.rst
+++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst
@@ -6,6 +6,9 @@
import pandas as pd
import matplotlib.pyplot as plt
+ # use the pandas main colors
+ import matplotlib as mpl
+ mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#150458", "#FFCA00", "#E70488"])
.. raw:: html
From 147551c5fe2a65d0fc93686bbd157b396722b876 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 13 Jan 2020 22:06:04 +0100
Subject: [PATCH 18/47] provide blueprint to intro tutorials
---
doc/source/_static/css/getting_started.css | 60 +++++++++++++-
doc/source/getting_started/index.rst | 92 +++++++++++++++++++++-
2 files changed, 150 insertions(+), 2 deletions(-)
diff --git a/doc/source/_static/css/getting_started.css b/doc/source/_static/css/getting_started.css
index 75b59401d28dc..3ecf6ed02e52a 100644
--- a/doc/source/_static/css/getting_started.css
+++ b/doc/source/_static/css/getting_started.css
@@ -190,4 +190,62 @@ ul.task-bullet > li > p:first-child {
margin-top: 0;
margin-bottom: 0rem;
color: #484848;
-}
\ No newline at end of file
+}
+
+/* intro to tutorial collapsed cards */
+
+.tutorial-accordion {
+ margin-top: 20px;
+ margin-bottom: 20px;
+}
+
+.tutorial-card .card-header.card-link .btn {
+ margin-right: 12px;
+}
+
+.tutorial-card .card-header.card-link .btn:after {
+ content: "-";
+}
+
+.tutorial-card .card-header.card-link.collapsed .btn:after {
+ content: "+";
+}
+
+.tutorial-card-header-1 {
+ justify-content: space-between;
+ align-items: center;
+}
+
+.tutorial-card-header-2 {
+ justify-content: flex-start;
+ align-items: center;
+ font-size: 1.3rem;
+}
+
+.tutorial-card .card-header {
+ cursor: pointer;
+ background-color: white;
+}
+
+.tutorial-card .card-body {
+ background-color: #F0F0F0;
+}
+
+.tutorial-card .badge {
+ background-color: #130654;
+ margin: 10px 10px 10px 10px;
+ padding: 5px;
+}
+
+.tutorial-card .gs-badge-link p {
+ margin: 0px;
+}
+
+.tutorial-card .gs-badge-link a {
+ color: white;
+ text-decoration: none;
+}
+
+.tutorial-card .badge:hover {
+ background-color: grey;
+}
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 84ce1e3b1001f..de5c7ef179a0f 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -93,7 +93,97 @@ Pandas can be installed via pip from `PyPI `__.
Intro to pandas
---------------
-TODO
+.. raw:: html
+
+
+
+
+
+
+
+
+
+ What kind of data does Pandas handle?
+
+
+
+:ref:`Straight to tutorial...<10min_tut_01_tableoriented>`
+
+.. raw:: html
+
+
+
+
+
+
+
+When working with tabular data, such as data stored in spreadsheets or databases, Pandas is the right tool for you. Pandas will help you
+to explore, clean and process your data. In Pandas, a data table is called a :class:`DataFrame`.
+
+.. image:: ../../_static/schemas/01_table_dataframe.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_01_tableoriented>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
+.. raw:: html
+
+
+
+ Filtering data based on the position in a table, the index name or a conditional expression.
+
+
+
+
+
+
+
.. _comingfrom:
From 3dfa4cdafb6bd4b056ad021b99bd449e5983a486 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 13 Jan 2020 22:07:02 +0100
Subject: [PATCH 19/47] Add first update to general doc page
---
doc/source/index.rst.template | 43 +++++++++++++++++++++++++++--------
1 file changed, 33 insertions(+), 10 deletions(-)
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index 671df8e4fabac..0eebce0915727 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -1,27 +1,50 @@
+:notoc:
+
.. pandas documentation master file, created by
.. module:: pandas
-*********************************************
-pandas: powerful Python data analysis toolkit
-*********************************************
+********************
+pandas documentation
+********************
**Date**: |today| **Version**: |version|
**Download documentation**: `PDF Version `__ | `Zipped HTML `__
-**Useful links**:
-`Binary Installers `__ |
-`Source Repository `__ |
+:mod:`pandas` is an open source, BSD-licensed library providing high-performance,
+easy-to-use data structures and data analysis tools for the `Python `__
+programming language.
+
+----------------------------------
+How the documentation is organized
+----------------------------------
+
+Pandas has a lot of documentation. A high-level overview of how it’s organized will help you know where to look for certain things:
+
+TODO -> 4 blokken (cfr. coming from)
+
+- :ref:`getting_started` contains a :ref:`_gentle_intro` first introduction to some of the main pandas core concepts. Furthermore, it contains an overview of
+:ref:`community_guides` community provided tutorials.
+- :ref:`user_guide` provides in-depth information on the key concepts of pandas with useful background information and explanation.
+- :ref:`api` contains a technical reference of the pandas API and available functionalities. The reference describes how the methods work and which parameters
+can be used. It assumes that you have an understanding of the key concepts.
+- Saw a typo in the documentation? Want to improve existing functionalities. The :ref:`development` guidelines will guide you through the process of setting up a development environment.
+
+------------
+Getting help
+------------
+
`Issues & Ideas `__ |
`Q&A Support `__ |
`Mailing List `__
-:mod:`pandas` is an open source, BSD-licensed library providing high-performance,
-easy-to-use data structures and data analysis tools for the `Python `__
-programming language.
+-----------------------------
+pandas documentation overview
+-----------------------------
-See the :ref:`overview` for more detail about what's in the library.
+`Binary Installers `__ |
+`Source Repository `__ |
{% if single_doc and single_doc.endswith('.rst') -%}
.. toctree::
From 9f413474c38227ac929dfae626fb44b0d2bdf4c8 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 13 Jan 2020 22:07:15 +0100
Subject: [PATCH 20/47] Fix minor styling elements
---
.../getting_started/intro_tutorials/01_table_oriented.rst | 4 ++--
doc/source/getting_started/intro_tutorials/03_subset_data.rst | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
index ea9a50987400c..bacc1b98e0bf9 100644
--- a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
+++ b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
@@ -2,8 +2,8 @@
{{ header }}
-Pandas is table oriented
-------------------------
+What kind of data does Pandas handle?
+=====================================
.. raw:: html
diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
index cd8278d83048b..a0d0977479882 100644
--- a/doc/source/getting_started/intro_tutorials/03_subset_data.rst
+++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
@@ -59,7 +59,7 @@ consists of the following data columns:
How do I select a subset of a ``DataFrame``?
-====================================================
+============================================
How do I select specific columns from a ``DataFrame``?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
From 80c961555bd2550928c836ce971ab0c41e41dede Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 13 Jan 2020 22:31:02 +0100
Subject: [PATCH 21/47] Add section 2 and 3 short intro
---
doc/source/getting_started/index.rst | 84 ++++++++++++++++++++++++----
1 file changed, 72 insertions(+), 12 deletions(-)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index de5c7ef179a0f..2a92282151607 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -97,8 +97,8 @@ Intro to pandas
-
+
@@ -145,7 +145,6 @@ to explore, clean and process your data. In Pandas, a data table is called a :cl
-
@@ -153,16 +152,45 @@ to explore, clean and process your data. In Pandas, a data table is called a :cl
How do I read and write tabular data?
+
+
+:ref:`Straight to tutorial...<10min_tut_02_read_write>`
+
+.. raw:: html
+
+
+
- Reading from and writing to different formats is built-in.
+
+Pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). Importing data from each of these
+data sources is provided by function with the prefix ``read_*``. Similarly, the ``to_*`` methods are used to store data.
+
+.. image:: ../../_static/schemas/02_io_readwrite.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_02_read_write>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
+.. raw:: html
+
+
+
-
-
+
@@ -171,15 +199,47 @@ to explore, clean and process your data. In Pandas, a data table is called a :cl
How do I select a subset of a table?
+
+
+:ref:`Straight to tutorial...<10min_tut_03_subset>`
+
+.. raw:: html
+
+
+
-
+
- Filtering data based on the position in a table, the index name or a conditional expression.
+
+Selecting or filtering specific rows and/or columns? Filtering the data on a condition? Methods for slicing, selecting, and extracting the
+data you need are available in Pandas.
+
+.. image:: ../../_static/schemas/03_subset_columns_rows.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_03_subset>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
+.. raw:: html
+
+
+
-
+
+
+
From cd446f271541161d423d8bb33cb00690b946571c Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 10:09:16 +0100
Subject: [PATCH 22/47] Add plot intro
---
doc/source/getting_started/index.rst | 47 ++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 2a92282151607..e5ea898fe70a9 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -231,6 +231,53 @@ data you need are available in Pandas.
:ref:`To user guide `
+.. raw:: html
+
+
+
+
+
+
+
+
+
+
+
+
+ How to create plots in pandas?
+
+
+
+:ref:`Straight to tutorial...<10min_tut_04_plotting>`
+
+.. raw:: html
+
+
+
+
+
+
+
+Pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...)
+corresponding to your data.
+
+.. image:: ../../_static/schemas/04_plot_overview.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_04_plotting>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
.. raw:: html
From 38287498aff8ea29c682235189e42d9b3ae6d2d3 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 10:22:34 +0100
Subject: [PATCH 23/47] Add dataframe calc intro
---
doc/source/getting_started/index.rst | 47 ++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index e5ea898fe70a9..d90a934c9555f 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -278,6 +278,53 @@ corresponding to your data.
:ref:`To user guide `
+.. raw:: html
+
+
+
+
+
+
+
+
+
+
+
+
+ How to create new columns derived from existing columns?
+
+
+
+:ref:`Straight to tutorial...<10min_tut_05_columns>`
+
+.. raw:: html
+
+
+
+
+
+
+
+There is no need to loop over all rows of your data table to do calculations, data manipulations on a column work elementwise.
+Adding a column to a :class:`DataFrame` based on existing data in other columns is straightforward.
+
+.. image:: ../../_static/schemas/05_newcolumn_2.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_05_columns>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
.. raw:: html
From 05f4811188e86e5ec6051a9dc2b244e6c601c2e5 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 11:02:05 +0100
Subject: [PATCH 24/47] Add groupby intro
---
doc/source/getting_started/index.rst | 49 ++++++++++++++++++++++++++--
1 file changed, 47 insertions(+), 2 deletions(-)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index d90a934c9555f..27a7069dce500 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -305,7 +305,7 @@ corresponding to your data.
-There is no need to loop over all rows of your data table to do calculations, data manipulations on a column work elementwise.
+There is no need to loop over all rows of your data table to do calculations. Data manipulations on a column work elementwise.
Adding a column to a :class:`DataFrame` based on existing data in other columns is straightforward.
.. image:: ../../_static/schemas/05_newcolumn_2.svg
@@ -323,7 +323,7 @@ Adding a column to a :class:`DataFrame` based on existing data in other columns
-:ref:`To user guide `
+:ref:`To user guide `
.. raw:: html
@@ -333,7 +333,52 @@ Adding a column to a :class:`DataFrame` based on existing data in other columns
+
+
+
+
+
+ How to calculate summary statistics?
+
+
+
+:ref:`Straight to tutorial...<10min_tut_06_stats>`
+.. raw:: html
+
+
+
+
+
+
+
+Basic statistics (mean, median, min, max, counts...) are easily calculable. These statistics can be applied on the entire
+data set, a sliding window of the data or grouped by categories. The latter is also known as the split-apply-combine approach.
+
+.. image:: ../../_static/schemas/06_groupby.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_06_stats>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
+.. raw:: html
+
+
+
+
+
+
From 809248dcc7467fedd7606d6d9fc0820d0c938ec3 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 11:21:26 +0100
Subject: [PATCH 25/47] Add reshape intro
---
doc/source/getting_started/index.rst | 49 +++++++++++++++++++++++++++-
1 file changed, 48 insertions(+), 1 deletion(-)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 27a7069dce500..ccd964a59780c 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -352,7 +352,7 @@ Adding a column to a :class:`DataFrame` based on existing data in other columns
-Basic statistics (mean, median, min, max, counts...) are easily calculable. These statistics can be applied on the entire
+Basic statistics (mean, median, min, max, counts...) are easily calculable. These or custom aggregations can be applied on the entire
data set, a sliding window of the data or grouped by categories. The latter is also known as the split-apply-combine approach.
.. image:: ../../_static/schemas/06_groupby.svg
@@ -372,6 +372,53 @@ data set, a sliding window of the data or grouped by categories. The latter is a
:ref:`To user guide `
+.. raw:: html
+
+
+
+
+
+
+
+
+
+
+
+
+ How to reshape the layout of tables?
+
+
+
+:ref:`Straight to tutorial...<10min_tut_07_reshape>`
+
+.. raw:: html
+
+
+
+
+
+
+
+Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot`
+from long to wide format. With aggregations built-in, a pivot table is created with a sinlge command.
+
+.. image:: ../../_static/schemas/07_melt.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_07_reshape>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
.. raw:: html
From 55f64efb30da6690bbc2c8fc94b184bc272288c7 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 11:22:59 +0100
Subject: [PATCH 26/47] Fix wrong collapseble
---
doc/source/getting_started/index.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index ccd964a59780c..d15896c22c52a 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -396,7 +396,7 @@ data set, a sliding window of the data or grouped by categories. The latter is a
-
+
Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot`
From 864ac3e89653818a15aa94a0d19c25c54d99bb1e Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 11:37:21 +0100
Subject: [PATCH 27/47] Fix typos
---
.../getting_started/intro_tutorials/08_combine_dataframes.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
index 55b2db0654b30..f262265adfef8 100644
--- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
+++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
@@ -302,7 +302,7 @@ More information on join/merge of tables is provided in
REMEMBER
-- Mulitple tables can be concatentated both column as row wise using
+- Multiple tables can be concatenated both column as row wise using
the ``concat`` function.
- For database-like merging/joining of tables, use the ``merge``
function.
From b7de411f532318cfcf613f152753308169fc0d7c Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 11:39:23 +0100
Subject: [PATCH 28/47] Add combine table intro
---
doc/source/getting_started/index.rst | 46 ++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index d15896c22c52a..05c702932c1d4 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -419,6 +419,52 @@ from long to wide format. With aggregations built-in, a pivot table is created w
:ref:`To user guide `
+.. raw:: html
+
+
+
+
+
+
+
+
+
+
+
+
+ How to combine data from multiple tables?
+
+
+
+:ref:`Straight to tutorial...<10min_tut_08_combine>`
+
+.. raw:: html
+
+
+
+
+
+
+
+Multiple tables can be concatenated both column wise as row wise and database-like join/merge operations are provided to combine multiple tables of data.
+
+.. image:: ../../_static/schemas/08_concat_row.svg
+ :align: center
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_08_combine>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
.. raw:: html
From 1e777eb259380f2ca0f03a380589814984f59fe6 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 11:47:49 +0100
Subject: [PATCH 29/47] Add intro textual and time series data
---
doc/source/getting_started/index.rst | 87 +++++++++++++++++++++++++++-
1 file changed, 86 insertions(+), 1 deletion(-)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 05c702932c1d4..7073c832a8bb5 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -463,7 +463,50 @@ Multiple tables can be concatenated both column wise as row wise and database-li
-:ref:`To user guide `
+:ref:`To user guide `
+
+.. raw:: html
+
+
+
+
+
+
+
+
+
+
+
+
+ How to handle time series data?
+
+
+
+:ref:`Straight to tutorial...<10min_tut_09_timeseries>`
+
+.. raw:: html
+
+
+
+
+
+
+
+Pandas has great support for time series and has an extensive set of tools for working with dates, times, and time-indexed data.
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_09_timeseries>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
.. raw:: html
@@ -473,6 +516,48 @@ Multiple tables can be concatenated both column wise as row wise and database-li
+
+
+
+
+
+ How to manipulate textual data?
+
+
+
+:ref:`Straight to tutorial...<10min_tut_10_text>`
+
+.. raw:: html
+
+
+
+
+
+
+
+Data sets do not only contain numerical data. Pandas provides a wide range of functions to cleaning textual data and extract useful information from it.
+
+.. raw:: html
+
+
+
+
+:ref:`To introduction tutorial <10min_tut_10_text>`
+
+.. raw:: html
+
+
+
+
+:ref:`To user guide `
+
+.. raw:: html
+
+
+
+
+
+
From 21776604a46160f9fdc0255303f24cbf9bca3bff Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 17:26:19 +0100
Subject: [PATCH 30/47] Reset index page
---
doc/source/index.rst.template | 46 ++++++++---------------------------
1 file changed, 10 insertions(+), 36 deletions(-)
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index 11c5ba485d8c2..4ced92cbda81a 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -1,55 +1,31 @@
-:notoc:
-
.. pandas documentation master file, created by
.. module:: pandas
-********************
-pandas documentation
-********************
+*********************************************
+pandas: powerful Python data analysis toolkit
+*********************************************
**Date**: |today| **Version**: |version|
**Download documentation**: `PDF Version `__ | `Zipped HTML `__
-:mod:`pandas` is an open source, BSD-licensed library providing high-performance,
-easy-to-use data structures and data analysis tools for the `Python `__
-programming language.
-
-----------------------------------
-How the documentation is organized
-----------------------------------
-
-Pandas has a lot of documentation. A high-level overview of how it’s organized will help you know where to look for certain things:
-
-TODO -> 4 blokken (cfr. coming from)
-
-- :ref:`getting_started` contains a :ref:`_gentle_intro` first introduction to some of the main pandas core concepts. Furthermore, it contains an overview of
-:ref:`community_guides` community provided tutorials.
-- :ref:`user_guide` provides in-depth information on the key concepts of pandas with useful background information and explanation.
-- :ref:`api` contains a technical reference of the pandas API and available functionalities. The reference describes how the methods work and which parameters
-can be used. It assumes that you have an understanding of the key concepts.
-- Saw a typo in the documentation? Want to improve existing functionalities. The :ref:`development` guidelines will guide you through the process of setting up a development environment.
-
-------------
-Getting help
-------------
-
+**Useful links**:
+`Binary Installers `__ |
+`Source Repository `__ |
`Issues & Ideas `__ |
`Q&A Support `__ |
`Mailing List `__
------------------------------
-pandas documentation overview
------------------------------
+:mod:`pandas` is an open source, BSD-licensed library providing high-performance,
+easy-to-use data structures and data analysis tools for the `Python `__
+programming language.
-`Binary Installers `__ |
-`Source Repository `__ |
+See the :ref:`overview` for more detail about what's in the library.
{% if single_doc and single_doc.endswith('.rst') -%}
.. toctree::
:maxdepth: 3
- :titlesonly:
{{ single_doc[:-4] }}
{% elif single_doc %}
@@ -61,7 +37,6 @@ pandas documentation overview
.. toctree::
:maxdepth: 3
:hidden:
- :titlesonly:
{% endif %}
{% if not single_doc %}
What's New in 1.1.0
@@ -82,7 +57,6 @@ pandas documentation overview
* :doc:`getting_started/install`
* :doc:`getting_started/overview`
* :doc:`getting_started/10min`
- * :doc:`getting_started/intro_tutorials/index`
* :doc:`getting_started/basics`
* :doc:`getting_started/dsintro`
* :doc:`getting_started/comparison/index`
From 9534c755e3f87a90359063ff0da31fbfd480a991 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 17:27:00 +0100
Subject: [PATCH 31/47] Fix excel file read and removal order
---
.../intro_tutorials/02_read_write.rst | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst
index 0f9848dfa5196..d8f5810fa5ea7 100644
--- a/doc/source/getting_started/intro_tutorials/02_read_write.rst
+++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst
@@ -147,12 +147,6 @@ the data as an excel file. In the example here, the ``sheet_name`` is
named *passengers* instead of the default *Sheet1*. By setting
``index=False`` the row index labels are not saved in the spreadsheet.
-.. ipython:: python
- :suppress:
-
- import os
- os.remove('titanic.xlsx')
-
.. raw:: html
@@ -169,6 +163,12 @@ DataFrame:
titanic.head()
+.. ipython:: python
+ :suppress:
+
+ import os
+ os.remove('titanic.xlsx')
+
.. raw:: html
From 6290de5f15be2cb7c88d2d54b10af2677d0e52a2 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 19:58:10 +0100
Subject: [PATCH 32/47] Fix title handling toctree main index page
---
doc/source/index.rst.template | 1 +
1 file changed, 1 insertion(+)
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index 4ced92cbda81a..6e66cec1e8127 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -26,6 +26,7 @@ See the :ref:`overview` for more detail about what's in the library.
{% if single_doc and single_doc.endswith('.rst') -%}
.. toctree::
:maxdepth: 3
+ :titlesonly:
{{ single_doc[:-4] }}
{% elif single_doc %}
From 79c6d694e1a3f2fda61e8b2751408800ebcffcb3 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 19:58:10 +0100
Subject: [PATCH 33/47] Fix title handling toctree main index page
---
doc/source/index.rst.template | 2 ++
1 file changed, 2 insertions(+)
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index 4ced92cbda81a..2bf4e72dd3bba 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -26,6 +26,7 @@ See the :ref:`overview` for more detail about what's in the library.
{% if single_doc and single_doc.endswith('.rst') -%}
.. toctree::
:maxdepth: 3
+ :titlesonly:
{{ single_doc[:-4] }}
{% elif single_doc %}
@@ -37,6 +38,7 @@ See the :ref:`overview` for more detail about what's in the library.
.. toctree::
:maxdepth: 3
:hidden:
+ :titlesonly:
{% endif %}
{% if not single_doc %}
What's New in 1.1.0
From 247e6a03f3a047e270f9279ee2433f4d38d54ddf Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 20:16:37 +0100
Subject: [PATCH 34/47] Remove trailing whitespaces in rst files
---
doc/source/getting_started/index.rst | 272 +++++++++---------
.../intro_tutorials/01_table_oriented.rst | 18 +-
.../intro_tutorials/02_read_write.rst | 16 +-
.../intro_tutorials/03_subset_data.rst | 32 +--
.../intro_tutorials/04_plotting.rst | 22 +-
.../intro_tutorials/05_add_columns.rst | 14 +-
.../06_calculate_statistics.rst | 16 +-
.../07_reshape_table_layout.rst | 34 +--
.../intro_tutorials/08_combine_dataframes.rst | 26 +-
.../intro_tutorials/09_timeseries.rst | 18 +-
.../intro_tutorials/10_text_data.rst | 18 +-
11 files changed, 243 insertions(+), 243 deletions(-)
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 7073c832a8bb5..98eba78492563 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -23,15 +23,15 @@ Before you can use pandas, you’ll need to get it installed.
-Pandas is part of the `Anaconda `__ distribution and can be
+Pandas is part of the `Anaconda `__ distribution and can be
installed with Anaconda or Miniconda:
-
+
.. raw:: html
-
Installing a specific version?
- Installing from source?
+
Installing a specific version?
+ Installing from source?
Check the advanced installation page.
.. container:: custom-button
-
+
:ref:`Learn more `
.. raw:: html
@@ -96,10 +96,10 @@ Intro to pandas
.. raw:: html
-
+
-
-
+
+
@@ -109,15 +109,15 @@ Intro to pandas
:ref:`Straight to tutorial...<10min_tut_01_tableoriented>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
-When working with tabular data, such as data stored in spreadsheets or databases, Pandas is the right tool for you. Pandas will help you
+When working with tabular data, such as data stored in spreadsheets or databases, Pandas is the right tool for you. Pandas will help you
to explore, clean and process your data. In Pandas, a data table is called a :class:`DataFrame`.
.. image:: ../../_static/schemas/01_table_dataframe.svg
@@ -141,12 +141,12 @@ to explore, clean and process your data. In Pandas, a data table is called a :cl
-
-
-
+
+
+
-
-
+
+
@@ -156,13 +156,13 @@ to explore, clean and process your data. In Pandas, a data table is called a :cl
:ref:`Straight to tutorial...<10min_tut_02_read_write>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
Pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). Importing data from each of these
data sources is provided by function with the prefix ``read_*``. Similarly, the ``to_*`` methods are used to store data.
@@ -188,12 +188,12 @@ data sources is provided by function with the prefix ``read_*``. Similarly, the
-
-
+
+
-
-
+
+
@@ -203,15 +203,15 @@ data sources is provided by function with the prefix ``read_*``. Similarly, the
:ref:`Straight to tutorial...<10min_tut_03_subset>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
-Selecting or filtering specific rows and/or columns? Filtering the data on a condition? Methods for slicing, selecting, and extracting the
+Selecting or filtering specific rows and/or columns? Filtering the data on a condition? Methods for slicing, selecting, and extracting the
data you need are available in Pandas.
.. image:: ../../_static/schemas/03_subset_columns_rows.svg
@@ -235,12 +235,12 @@ data you need are available in Pandas.
-
-
+
+
-
-
+
+
@@ -250,15 +250,15 @@ data you need are available in Pandas.
:ref:`Straight to tutorial...<10min_tut_04_plotting>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
-Pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...)
+Pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...)
corresponding to your data.
.. image:: ../../_static/schemas/04_plot_overview.svg
@@ -282,12 +282,12 @@ corresponding to your data.
-
-
+
+
-
-
+
+
@@ -297,15 +297,15 @@ corresponding to your data.
:ref:`Straight to tutorial...<10min_tut_05_columns>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
-There is no need to loop over all rows of your data table to do calculations. Data manipulations on a column work elementwise.
+There is no need to loop over all rows of your data table to do calculations. Data manipulations on a column work elementwise.
Adding a column to a :class:`DataFrame` based on existing data in other columns is straightforward.
.. image:: ../../_static/schemas/05_newcolumn_2.svg
@@ -329,12 +329,12 @@ Adding a column to a :class:`DataFrame` based on existing data in other columns
-
-
+
+
-
-
+
+
@@ -344,15 +344,15 @@ Adding a column to a :class:`DataFrame` based on existing data in other columns
:ref:`Straight to tutorial...<10min_tut_06_stats>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
-Basic statistics (mean, median, min, max, counts...) are easily calculable. These or custom aggregations can be applied on the entire
+Basic statistics (mean, median, min, max, counts...) are easily calculable. These or custom aggregations can be applied on the entire
data set, a sliding window of the data or grouped by categories. The latter is also known as the split-apply-combine approach.
.. image:: ../../_static/schemas/06_groupby.svg
@@ -376,12 +376,12 @@ data set, a sliding window of the data or grouped by categories. The latter is a
-
-
+
+
-
-
+
+
@@ -391,15 +391,15 @@ data set, a sliding window of the data or grouped by categories. The latter is a
:ref:`Straight to tutorial...<10min_tut_07_reshape>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
-Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot`
+Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot`
from long to wide format. With aggregations built-in, a pivot table is created with a sinlge command.
.. image:: ../../_static/schemas/07_melt.svg
@@ -423,12 +423,12 @@ from long to wide format. With aggregations built-in, a pivot table is created w
-
-
+
+
-
-
+
+
@@ -438,13 +438,13 @@ from long to wide format. With aggregations built-in, a pivot table is created w
:ref:`Straight to tutorial...<10min_tut_08_combine>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
Multiple tables can be concatenated both column wise as row wise and database-like join/merge operations are provided to combine multiple tables of data.
@@ -469,12 +469,12 @@ Multiple tables can be concatenated both column wise as row wise and database-li
-
-
+
+
-
-
+
+
@@ -484,15 +484,15 @@ Multiple tables can be concatenated both column wise as row wise and database-li
:ref:`Straight to tutorial...<10min_tut_09_timeseries>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
-Pandas has great support for time series and has an extensive set of tools for working with dates, times, and time-indexed data.
+Pandas has great support for time series and has an extensive set of tools for working with dates, times, and time-indexed data.
.. raw:: html
@@ -512,12 +512,12 @@ Pandas has great support for time series and has an extensive set of tools for w
-
-
+
+
-
-
+
+
@@ -527,13 +527,13 @@ Pandas has great support for time series and has an extensive set of tools for w
:ref:`Straight to tutorial...<10min_tut_10_text>`
-.. raw:: html
-
+.. raw:: html
+
-
-
-
-
+
+
+
+
Data sets do not only contain numerical data. Pandas provides a wide range of functions to cleaning textual data and extract useful information from it.
@@ -555,11 +555,11 @@ Data sets do not only contain numerical data. Pandas provides a wide range of fu
-
-
+
+
-
+
@@ -578,14 +578,14 @@ the pandas syntax by looking for equivalents from the software you already know:
-
+
-
The R programming language provides the data.frame data structure and multiple packages,
- such as tidyverse use and extend data.frames for convenient data handling
+
The R programming language provides the data.frame data structure and multiple packages,
+ such as tidyverse use and extend data.frames for convenient data handling
functionalities similar to pandas.
.. container:: custom-button
-
+
:ref:`Learn more `
.. raw:: html
@@ -595,9 +595,9 @@ the pandas syntax by looking for equivalents from the software you already know:
-
+
-
Already familiar to SELECT, GROUP BY, JOIN,...?
+
Already familiar to SELECT, GROUP BY, JOIN,...?
Most of these SQL manipulations do have equivalents in pandas.
.. container:: custom-button
@@ -611,15 +611,15 @@ the pandas syntax by looking for equivalents from the software you already know:
-
+
-
The data set included in the
- STATA statistical software suite corresponds
+
The data set included in the
+ STATA statistical software suite corresponds
to the pandas data.frame. Many of the operations known from STATA have an equivalent
in pandas.
.. container:: custom-button
-
+
:ref:`Learn more `
.. raw:: html
@@ -629,11 +629,11 @@ the pandas syntax by looking for equivalents from the software you already know:
-
+
-
The SAS statistical software suite
- also provides the data set corresponding to the pandas data.frame.
- Also vectorized operations, filtering, string processing operations,... from SAS have similar
+
The SAS statistical software suite
+ also provides the data set corresponding to the pandas data.frame.
+ Also vectorized operations, filtering, string processing operations,... from SAS have similar
functions in pandas.
.. container:: custom-button
@@ -644,15 +644,15 @@ the pandas syntax by looking for equivalents from the software you already know:
-
-
+
+
Community tutorials
-------------------
-The community produces a wide variety of tutorials available online. Some of the
-material is enlisted in the community contributed :ref:`tutorials`.
+The community produces a wide variety of tutorials available online. Some of the
+material is enlisted in the community contributed :ref:`tutorials`.
.. If you update this toctree, also update the manual toctree in the
diff --git a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
index bacc1b98e0bf9..208da04effdd5 100644
--- a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
+++ b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
@@ -42,15 +42,15 @@ I want to store passenger data of the Titanic. For a number of passengers, I kno
.. ipython:: python
df = pd.DataFrame({
- "Name": ["Braund, Mr. Owen Harris",
- "Allen, Mr. William Henry",
- "Bonnell, Miss. Elizabeth"],
+ "Name": ["Braund, Mr. Owen Harris",
+ "Allen, Mr. William Henry",
+ "Bonnell, Miss. Elizabeth"],
"Age": [22, 35, 58],
"Sex": ["male", "male", "female"]}
)
df
-To manually store data in a table, create a ``DataFrame``. When using a Python dictionary of lists, the dictionary keys will be used as column headers and
+To manually store data in a table, create a ``DataFrame``. When using a Python dictionary of lists, the dictionary keys will be used as column headers and
the values in each list as rows of the ``DataFrame``.
.. raw:: html
@@ -124,7 +124,7 @@ Do something with a DataFrame or Series
-
+
I want to know the maximum Age of the passengers
We can do this on the ``DataFrame`` by selecting the ``Age`` column and
@@ -177,7 +177,7 @@ pandas ``Series``.
.. raw:: html
- To user guide
+ To user guide
Check more options on ``describe`` in the user guide at :ref:`basics.describe`
@@ -194,7 +194,7 @@ Check more options on ``describe`` in the user guide at :ref:`basics.describe`
.. raw:: html
-
+
REMEMBER
- Import the package, aka ``import pandas as pd``
@@ -209,10 +209,10 @@ Check more options on ``describe`` in the user guide at :ref:`basics.describe`
.. raw:: html
- To user guide
+ To user guide
A more extended introduction to ``DataFrame`` and ``Series`` is provided in :ref:`dsintro`.
.. raw:: html
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst
index d8f5810fa5ea7..51995bd76ddef 100644
--- a/doc/source/getting_started/intro_tutorials/02_read_write.rst
+++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst
@@ -22,7 +22,7 @@
-
+
This tutorial uses the titanic data set, stored as CSV. The data
consists of the following data columns:
@@ -55,12 +55,12 @@ How do I read and write tabular data?
.. image:: ../../_static/schemas/02_io_readwrite.svg
:align: center
-
+
.. raw:: html
-
+
I want to analyse the titanic passenger data, available as a CSV file.
.. ipython:: python
@@ -133,7 +133,7 @@ strings (``object``).
.. raw:: html
-
+
My colleague requested the titanic data as a spreadsheet.
@@ -172,7 +172,7 @@ DataFrame:
.. raw:: html
-
+
I’m interested in a technical summary of a ``DataFrame``
@@ -207,7 +207,7 @@ The method ``info`` provides a lot of technical information about the
.. raw:: html
-
+
REMEMBER
- Getting data in to pandas from many different file formats or data
@@ -224,10 +224,10 @@ The method ``info`` provides a lot of technical information about the
.. raw:: html
- To user guide
+ To user guide
For a complete overview of the input and output possibilites from and to pandas, see :ref:`io`
.. raw:: html
-
-
+
This tutorial uses the titanic data set, stored as CSV. The data
consists of the following data columns:
@@ -71,7 +71,7 @@ How do I select specific columns from a ``DataFrame``?
-
+
I’m interested in the age of the titanic passengers.
.. ipython:: python
@@ -110,7 +110,7 @@ the number of rows is returned.
-
+
I’m interested in the age and sex of the titanic passengers.
.. ipython:: python
@@ -148,13 +148,13 @@ The selection returned a ``DataFrame`` with 891 rows and 2 columns. Remember, a
.. raw:: html
- To user guide
+ To user guide
For basic information on indexing, see :ref:`indexing.basics`
.. raw:: html
-
+
How do I filter specific rows from a ``DataFrame``?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -166,7 +166,7 @@ How do I filter specific rows from a ``DataFrame``?
-
+
I’m interested in the passengers older than 35 years.
.. ipython:: python
@@ -175,7 +175,7 @@ I’m interested in the passengers older than 35 years.
above_35.head()
To select rows based on a conditional expression, use a condition inside
-the selection brackets ``[]``.
+the selection brackets ``[]``.
.. raw:: html
@@ -249,13 +249,13 @@ operator:
.. raw:: html
- To user guide
+ To user guide
Conditional (boolean) indexing, see :ref:`indexing.boolean`. Specific information on ``isin``, see :ref:`indexing.basics.indexing_isin`.
.. raw:: html
-
+
.. raw:: html
@@ -288,8 +288,8 @@ the same values. One way to verify is to check if the shape has changed:
.. raw:: html
- To user guide
-
+ To user guide
+
For more dedicated functions on missing values, see :ref:`missing-data`
.. raw:: html
@@ -366,17 +366,17 @@ the name ``anonymous`` to the first 3 elements of the third column:
.. raw:: html
- To user guide
+ To user guide
For more detailed description on selecting subsets of a data table, see :ref:`indexing.choice`
.. raw:: html
-
+
.. raw:: html
-
+
REMEMBER
- When selecting subsets of data, square brackets ``[]`` are used.
@@ -396,10 +396,10 @@ For more detailed description on selecting subsets of a data table, see :ref:`in
.. raw:: html
- To user guide
+ To user guide
Further details about indexing is provided in :ref:`indexing`
.. raw:: html
-
+
diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst
index 78e4047f4e0d0..e6b87bb16cf2e 100644
--- a/doc/source/getting_started/intro_tutorials/04_plotting.rst
+++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst
@@ -8,7 +8,7 @@
import matplotlib.pyplot as plt
# use the pandas main colors
import matplotlib as mpl
- mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#150458", "#FFCA00", "#E70488"])
+ mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#150458", "#FFCA00", "#E70488"])
.. raw:: html
@@ -26,7 +26,7 @@
-
+
For this tutorial, air quality data about :math:`NO_2` is used, made
available by `openaq `__ and using the
`py-openaq `__ package.
@@ -43,7 +43,7 @@ in respectively Paris, Antwerp and London.
.. ipython:: python
- air_quality = pd.read_csv("data/air_quality_no2.csv",
+ air_quality = pd.read_csv("data/air_quality_no2.csv",
index_col=0, parse_dates=True)
air_quality.head()
@@ -94,7 +94,7 @@ I want to plot only the columns of the data table with the data from Paris.
@savefig 04_airqual_paris.png
air_quality["station_paris"].plot()
-To plot a specific column, use the selection method of the
+To plot a specific column, use the selection method of the
:ref:`subset data tutorial <10min_tut_03_subset>` in combination with the ``plot``
method. Hence, the ``plot`` method works on both ``Series`` and
``DataFrame``.
@@ -114,8 +114,8 @@ I want to visually compare the :math:`N0_2` values measured in London versus Par
.. ipython:: python
@savefig 04_airqual_scatter.png
- air_quality.plot.scatter(x="station_london",
- y="station_paris",
+ air_quality.plot.scatter(x="station_london",
+ y="station_paris",
alpha=0.5)
.. raw:: html
@@ -148,7 +148,7 @@ method is applicable on the air quality example data:
.. raw:: html
- To user guide
+ To user guide
For an introduction to the other plot methods, see :ref:`visualization.other`.
@@ -168,7 +168,7 @@ I want each of the columns in a separate subplot.
@savefig 04_airqual_area_subplot.png
axs = air_quality.plot.area(figsize=(12, 4), subplots=True)
-Separate subplots for each of the data columns is supported by the ``subplots`` argument
+Separate subplots for each of the data columns is supported by the ``subplots`` argument
of the plot functions. The builtin options available in each of the pandas plot
functions that are worthwhile to have a look.
@@ -180,7 +180,7 @@ functions that are worthwhile to have a look.
.. raw:: html
- To user guide
+ To user guide
Some more formatting options of the pandas plot functionalities are explained in :ref:`visualization.formatting`.
@@ -229,7 +229,7 @@ This strategy is applied in the previous example:
.. raw:: html
-
+
REMEMBER
- The ``.plot`` methods are applicable on both Series and DataFrames
@@ -244,7 +244,7 @@ This strategy is applied in the previous example:
.. raw:: html
- To user guide
+ To user guide
Further details about indexing is provided in :ref:`visualization`.
diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
index da49bdbaba511..b080dbd2f158f 100644
--- a/doc/source/getting_started/intro_tutorials/05_add_columns.rst
+++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
@@ -22,7 +22,7 @@
-
+
For this tutorial, air quality data about :math:`NO_2` is used, made
available by `openaq `__ and using the
`py-openaq `__ package.
@@ -39,7 +39,7 @@ in respectively Paris, Antwerp and London.
.. ipython:: python
- air_quality = pd.read_csv("data/air_quality_no2.csv",
+ air_quality = pd.read_csv("data/air_quality_no2.csv",
index_col=0, parse_dates=True)
air_quality.head()
@@ -99,7 +99,7 @@ I want to check the ratio of the values in Paris versus Antwerp and save the res
air_quality.head()
The calculation is again element-wise, so the ``/`` is applied *for the
-values in each row*.
+values in each row*.
.. raw:: html
@@ -120,7 +120,7 @@ I want to rename the data columns to the corresponding station identifiers used
.. ipython:: python
- air_quality_renamed = air_quality.rename(columns = {"station_antwerp": "BETR801",
+ air_quality_renamed = air_quality.rename(columns = {"station_antwerp": "BETR801",
"station_paris": "FR04014",
"station_london": "London Westminster"})
@@ -149,7 +149,7 @@ lowercase letters can be done using a function as well:
.. raw:: html
- To user guide
+ To user guide
Further details about column or row label renaming is provided in :ref:`basics.rename`.
@@ -159,7 +159,7 @@ Further details about column or row label renaming is provided in :ref:`basics.r
.. raw:: html
-
+
REMEMBER
- Create a new column by assigning the output to the DataFrame with a
@@ -176,7 +176,7 @@ Further details about column or row label renaming is provided in :ref:`basics.r
To user guide
-
+
Further details about column addition and deletion is provided in :ref:`basics.dataframe.sel_add_del`.
.. raw:: html
diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
index ad3e34f298164..7335560dd76f9 100644
--- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
@@ -22,7 +22,7 @@
-
+
This tutorial uses the titanic data set, stored as CSV. The data
consists of the following data columns:
@@ -107,10 +107,10 @@ return a ``DataFrame``, see the :ref:`subset data tutorial <10min_tut_03_subset>
.. raw:: html
-
+
The aggregating statistic can be calculated for multiple columns at the
-same time. Remember the ``describe`` function from :ref:`first tutorial <10min_tut_01_tableoriented>` tutorial?
+same time. Remember the ``describe`` function from :ref:`first tutorial <10min_tut_01_tableoriented>` tutorial?
.. ipython:: python
@@ -122,7 +122,7 @@ aggregating statistics for given columns can be defined using the
.. ipython:: python
- titanic.agg({'Age' : ['min', 'max', 'median', 'skew'],
+ titanic.agg({'Age' : ['min', 'max', 'median', 'skew'],
'Fare' : ['min', 'max', 'median', 'mean']})
.. raw:: html
@@ -252,7 +252,7 @@ What is the number of passengers in each of the cabin classes?
titanic["Pclass"].value_counts()
The ``value_counts`` function counts the number of records for each
-category in a column.
+category in a column.
.. raw:: html
@@ -276,8 +276,8 @@ within each group:
.. raw:: html
- To user guide
-
+ To user guide
+
For more information about ``value_counts``, see :ref:`basics.discretization`.
.. raw:: html
@@ -286,7 +286,7 @@ For more information about ``value_counts``, see :ref:`basics.discretization`.
.. raw:: html
-
+
REMEMBER
- Aggregation statistics can be calculated on entire columns or rows
diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
index 2d0d862940e9b..728b9179c3b3d 100644
--- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
+++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
@@ -22,7 +22,7 @@
-
+
This tutorial uses the titanic data set, stored as CSV. The data
consists of the following data columns:
@@ -101,7 +101,7 @@ measurement.
.. ipython:: python
- air_quality = pd.read_csv("data/air_quality_long.csv",
+ air_quality = pd.read_csv("data/air_quality_long.csv",
index_col="date.utc", parse_dates=True)
air_quality.head()
@@ -155,7 +155,7 @@ defined column(s). The index will follow the row order.
.. raw:: html
- To user guide
+ To user guide
More details about sorting of tables is provided in :ref:`basics.sorting`.
@@ -216,14 +216,14 @@ series at the same time:
no2.pivot(columns="location", values="value").plot()
.. note::
- When the ``index`` parameter is not defined, the existing
+ When the ``index`` parameter is not defined, the existing
index (row labels) is used.
.. raw:: html
- To user guide
-
+ To user guide
+
For more information about ``pivot``, see :ref:`reshaping.reshaping`.
.. raw:: html
@@ -245,7 +245,7 @@ I want the mean concentrations for :math:`NO_2` and :math:`PM_{2.5}` in each of
.. ipython:: python
- air_quality.pivot_table(values="value", index="location",
+ air_quality.pivot_table(values="value", index="location",
columns="parameter", aggfunc="mean")
In the case of ``pivot``, the data is only rearranged. When multiple
@@ -264,14 +264,14 @@ the ``margin`` parameter to ``True``:
.. ipython:: python
- air_quality.pivot_table(values="value", index="location",
+ air_quality.pivot_table(values="value", index="location",
columns="parameter", aggfunc="mean",
margins=True)
.. raw:: html
- To user guide
+ To user guide
For more information about ``pivot_table``, see :ref:`reshaping.pivot`.
@@ -292,7 +292,7 @@ For more information about ``pivot_table``, see :ref:`reshaping.pivot`.
.. raw:: html
- To user guide
+ To user guide
Have a look at ``groupby`` in combination with ``unstack`` at :ref:`reshaping.combine_with_groupby`.
@@ -326,14 +326,14 @@ I want to collect all air quality :math:`NO_2` measurements in a single column (
no_2 = no2_pivoted.melt(id_vars="date.utc")
no_2.head()
-The ``melt`` method on a ``DataFrame`` converts the data table from wide
-format to long format. The column headers become the variable names in a
+The ``melt`` method on a ``DataFrame`` converts the data table from wide
+format to long format. The column headers become the variable names in a
newly created column.
.. raw:: html
-
+
The solution is the short version on how to apply ``melt``. The method
will *melt* all columns NOT mentioned in ``id_vars`` together into two
@@ -344,7 +344,7 @@ The ``melt`` method can be defined in more detail:
.. ipython:: python
- no_2 = no2_pivoted.melt(id_vars="date.utc",
+ no_2 = no2_pivoted.melt(id_vars="date.utc",
value_vars=["BETR801", "FR04014", "London Westminster"],
value_name="NO_2",
var_name="id_location")
@@ -366,7 +366,7 @@ are defined by ``id_vars`` and ``value_vars``.
.. raw:: html
- To user guide
+ To user guide
Conversion from wide to long format with ``melt`` is explained in :ref:`reshaping.melt`.
@@ -376,7 +376,7 @@ Conversion from wide to long format with ``melt`` is explained in :ref:`reshapin
.. raw:: html
-
+
REMEMBER
- Sorting by one or more columns is supported by ``sort_values``
@@ -392,7 +392,7 @@ Conversion from wide to long format with ``melt`` is explained in :ref:`reshapin
.. raw:: html
- To user guide
+ To user guide
More information on reshaping and pivoting is provided in :ref:`reshaping`.
diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
index f262265adfef8..7572a082d0123 100644
--- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
+++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
@@ -22,7 +22,7 @@
-
+
For this tutorial, air quality data about :math:`NO_2` is used, made available by
`openaq `__ and downloaded using the
`py-openaq `__ package.
@@ -40,7 +40,7 @@ Westminster* in respectively Paris, Antwerp and London.
.. ipython:: python
- air_quality_no2 = pd.read_csv("data/air_quality_no2_long.csv",
+ air_quality_no2 = pd.read_csv("data/air_quality_no2_long.csv",
parse_dates=True)
air_quality_no2 = air_quality_no2[["date.utc", "location", "parameter", "value"]] # focus on a subset of columns
air_quality_no2.head()
@@ -74,12 +74,12 @@ Westminster* in respectively Paris, Antwerp and London.
.. ipython:: python
- air_quality_pm25 = pd.read_csv("data/air_quality_pm25_long.csv",
+ air_quality_pm25 = pd.read_csv("data/air_quality_pm25_long.csv",
parse_dates=True)
air_quality_pm25 = air_quality_pm25[["date.utc", "location", "parameter", "value"]] # focus on a subset of columns
air_quality_pm25.head()
-.. raw:: html
+.. raw:: html
@@ -108,7 +108,7 @@ I want to combine the measurements of :math:`NO_2` and :math:`PM_{25}`, two tabl
air_quality.head()
The ``concat`` function performs concatenation operations of multiple
-tables along one of the axis (row-wise or column-wise).
+tables along one of the axis (row-wise or column-wise).
.. raw:: html
@@ -173,7 +173,7 @@ index. For example:
.. raw:: html
- To user guide
+ To user guide
Feel free to dive into the world of multi-indexing at :ref:`advanced`.
@@ -184,7 +184,7 @@ index. For example:
.. raw:: html
- To user guide
+ To user guide
More options on table concatenation (row and column
wise) and how ``concat`` can be used to define the logic (union or
@@ -230,7 +230,7 @@ Add the station coordinates, provided by the stations metadata table, to the cor
.. ipython:: python
- air_quality = pd.merge(air_quality, stations_coord,
+ air_quality = pd.merge(air_quality, stations_coord,
how='left', on='location')
air_quality.head()
@@ -267,7 +267,7 @@ Add the parameter full description and name, provided by the parameters metadata
.. ipython:: python
- air_quality = pd.merge(air_quality, air_quality_parameters,
+ air_quality = pd.merge(air_quality, air_quality_parameters,
how='left', left_on='parameter', right_on='id')
air_quality.head()
@@ -286,9 +286,9 @@ between the two tables.
.. raw:: html
- To user guide
+ To user guide
-Pandas supports also innPandas supports also inner, outer, and right joins.
+Pandas supports also innPandas supports also inner, outer, and right joins.
More information on join/merge of tables is provided in
:ref:`merging.join`. Or have a look to the
:ref:`comparison with SQL`.
@@ -299,7 +299,7 @@ More information on join/merge of tables is provided in
.. raw:: html
-
+
REMEMBER
- Multiple tables can be concatenated both column as row wise using
@@ -314,7 +314,7 @@ More information on join/merge of tables is provided in
.. raw:: html
- To user guide
+ To user guide
The user guide provides more information on combining together data tables, see :ref:`merging`.
diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
index c1b2378c7de83..3f32d3dfd0c60 100644
--- a/doc/source/getting_started/intro_tutorials/09_timeseries.rst
+++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
@@ -23,7 +23,7 @@
-
+
For this tutorial, air quality data about :math:`NO_2` and Particulate
matter less than 2.5 micrometers is used, made available by
`openaq `__ and downloaded using the
@@ -120,7 +120,7 @@ from the standard Python library and defining a time duration.
.. raw:: html
- To user guide
+ To user guide
The main time related concepts of pandas are explained in :ref:`timeseries.overview`.
@@ -131,7 +131,7 @@ The main time related concepts of pandas are explained in :ref:`timeseries.overv
.. raw:: html
-
+
I want to add a new column to the dataframe containing only the month of the measurement
@@ -211,7 +211,7 @@ Plot the typical :math:`NO_2` pattern during the day of our time series of all s
Similar to the previous case, we want to calculate a given statistic
(e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the
-split-apply-combine approach again. For this case, the datetime property ``hour``
+split-apply-combine approach again. For this case, the datetime property ``hour``
of pandas ``Timestamp``, which is also accessible by the ``dt`` accessor.
.. raw:: html
@@ -222,7 +222,7 @@ of pandas ``Timestamp``, which is also accessible by the ``dt`` accessor.
Datetime as index
~~~~~~~~~~~~~~~~~
-In the :ref:`tutorial on reshaping <10min_tut_07_reshape>`,
+In the :ref:`tutorial on reshaping <10min_tut_07_reshape>`,
``pivot`` was introduced to reshape the data table with each of the
measurements locations as a separate column:
@@ -295,7 +295,7 @@ Aggregate the current hourly time series values to the monthly maximum value in
A very powerful method on time series data with a datetime index, is the
ability to ``resample`` time series to another frequency (e.g.,
-converting secondly data into 5-minutely data).
+converting secondly data into 5-minutely data).
.. raw:: html
@@ -346,7 +346,7 @@ Make a plot of the daily median :math:`NO_2` value in each of the stations.
To user guide
-
+
More details on the power of time series ``resampling`` is provided in :ref:`timeseries.resampling`.
.. raw:: html
@@ -355,7 +355,7 @@ More details on the power of time series ``resampling`` is provided in :ref:`tim
.. raw:: html
-
+
REMEMBER
- Valid date strings can be converted to datetime objects using
@@ -374,7 +374,7 @@ More details on the power of time series ``resampling`` is provided in :ref:`tim
.. raw:: html
- To user guide
+ To user guide
More information on time series and date functionalities is given in :ref:`timeseries`.
diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst
index cb26b1252eed6..2f1a0fd34f6c3 100644
--- a/doc/source/getting_started/intro_tutorials/10_text_data.rst
+++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst
@@ -22,7 +22,7 @@
-
+
This tutorial uses the titanic data set, stored as CSV. The data
consists of the following data columns:
@@ -72,7 +72,7 @@ Make all name characters lowercase
titanic["Name"].str.lower()
-To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column
+To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column
(see :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
apply the ``lower`` method. As such, each of the strings is converted element wise.
@@ -81,12 +81,12 @@ apply the ``lower`` method. As such, each of the strings is converted element wi
-Similar to datetime objects in the :ref:`time series tutorial <10min_tut_09_timeseries>`
+Similar to datetime objects in the :ref:`time series tutorial <10min_tut_09_timeseries>`
having a ``dt`` accessor, a number of
specialized string methods are available when using the ``str``
accessor. These methods have in general matching names with the
equivalent built-in string methods for single elements, but are applied
-element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?)
+element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?)
on each of the values of the columns.
.. raw:: html
@@ -122,7 +122,7 @@ concatenated to combine multiple functions at once!
.. raw:: html
- To user guide
+ To user guide
More information on extracting parts of strings is available in :ref:`text.split`.
@@ -169,7 +169,7 @@ only 1 Countess on the Titanic, we get one row as a result.
.. raw:: html
- To user guide
+ To user guide
More information on extracting parts of strings is available in :ref:`text.extract`.
@@ -224,7 +224,7 @@ In the ‘Sex’ columns, replace values of ‘male’ by ‘M’ and all ‘fem
.. ipython:: python
- titanic["Sex_short"] = titanic["Sex"].replace({"male": "M",
+ titanic["Sex_short"] = titanic["Sex"].replace({"male": "M",
"female": "F"})
titanic["Sex_short"]
@@ -253,7 +253,7 @@ a ``dictionary`` to define the mapping ``{from : to}``.
.. raw:: html
-
+
REMEMBER
- String methods are available using the ``str`` accessor.
@@ -269,7 +269,7 @@ a ``dictionary`` to define the mapping ``{from : to}``.
.. raw:: html
- To user guide
+ To user guide
More information on string methods is given in :ref:`text`.
From 67639ff72e65ad1f1c86eefefd5a439dc6f3f40f Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 20:42:15 +0100
Subject: [PATCH 35/47] Fix linting errors
---
.../intro_tutorials/07_reshape_table_layout.rst | 4 +++-
.../intro_tutorials/08_combine_dataframes.rst | 9 ++++++---
.../intro_tutorials/09_timeseries.rst | 14 +++++++++-----
3 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
index 728b9179c3b3d..54a374d547f20 100644
--- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
+++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
@@ -345,7 +345,9 @@ The ``melt`` method can be defined in more detail:
.. ipython:: python
no_2 = no2_pivoted.melt(id_vars="date.utc",
- value_vars=["BETR801", "FR04014", "London Westminster"],
+ value_vars=["BETR801",
+ "FR04014",
+ "London Westminster"],
value_name="NO_2",
var_name="id_location")
no_2.head()
diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
index 7572a082d0123..31bddb4a958d5 100644
--- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
+++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
@@ -42,7 +42,8 @@ Westminster* in respectively Paris, Antwerp and London.
air_quality_no2 = pd.read_csv("data/air_quality_no2_long.csv",
parse_dates=True)
- air_quality_no2 = air_quality_no2[["date.utc", "location", "parameter", "value"]] # focus on a subset of columns
+ air_quality_no2 = air_quality_no2[["date.utc", "location",
+ "parameter", "value"]]
air_quality_no2.head()
.. raw:: html
@@ -76,7 +77,8 @@ Westminster* in respectively Paris, Antwerp and London.
air_quality_pm25 = pd.read_csv("data/air_quality_pm25_long.csv",
parse_dates=True)
- air_quality_pm25 = air_quality_pm25[["date.utc", "location", "parameter", "value"]] # focus on a subset of columns
+ air_quality_pm25 = air_quality_pm25[["date.utc", "location",
+ "parameter", "value"]]
air_quality_pm25.head()
.. raw:: html
@@ -153,7 +155,8 @@ index. For example:
.. ipython:: python
- air_quality_ = pd.concat([air_quality_pm25, air_quality_no2], keys=["PM25", "NO2"])
+ air_quality_ = pd.concat([air_quality_pm25, air_quality_no2],
+ keys=["PM25", "NO2"])
.. ipython:: python
diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
index 3f32d3dfd0c60..efed17a473e18 100644
--- a/doc/source/getting_started/intro_tutorials/09_timeseries.rst
+++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
@@ -173,7 +173,8 @@ What is the average :math:`NO_2` concentration for each day of the week for each
.. ipython:: python
- air_quality.groupby([air_quality["datetime"].dt.weekday, "location"])["value"].mean()
+ air_quality.groupby(
+ [air_quality["datetime"].dt.weekday, "location"])["value"].mean()
Remember the split-apply-combine pattern provided by ``groupby`` from the
:ref:`tutorial on statistics calculation <10min_tut_06_stats>`?
@@ -204,10 +205,13 @@ Plot the typical :math:`NO_2` pattern during the day of our time series of all s
.. ipython:: python
fig, axs = plt.subplots(figsize=(12, 4))
- air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar', rot=0, ax=axs)
- plt.xlabel("Hour of the day"); # custom label for the x-axis using matplotlib
+ air_quality.groupby(
+ air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar',
+ rot=0,
+ ax=axs)
+ plt.xlabel("Hour of the day"); # custom x label using matplotlib
@savefig 09_bar_chart.png
- plt.ylabel("$NO_2\ (µg/m^3)$");
+ plt.ylabel("$NO_2\;(µg/m^3)$");
Similar to the previous case, we want to calculate a given statistic
(e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the
@@ -258,7 +262,7 @@ Create a plot of the :math:`NO_2` values in the different stations from the 20th
.. ipython:: python
@savefig 09_time_section.png
- no_2["2019-05-20" : "2019-05-21"].plot();
+ no_2["2019-05-20":"2019-05-21"].plot();
By providing a **string that parses to a datetime**, a specific subset of the data can be selected on a ``DatetimeIndex``.
From 19ba8105aa0ba4c8271ec197d854fd9222ce9989 Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 21:06:31 +0100
Subject: [PATCH 36/47] Fix linting issues of tutorials
---
.../intro_tutorials/01_table_oriented.rst | 8 ++++----
.../getting_started/intro_tutorials/04_plotting.rst | 8 ++++++--
.../intro_tutorials/05_add_columns.rst | 12 +++++++-----
.../intro_tutorials/06_calculate_statistics.rst | 4 ++--
.../intro_tutorials/07_reshape_table_layout.rst | 5 +++--
.../intro_tutorials/09_timeseries.rst | 2 +-
6 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
index 208da04effdd5..db13b6142a340 100644
--- a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
+++ b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst
@@ -43,11 +43,11 @@ I want to store passenger data of the Titanic. For a number of passengers, I kno
df = pd.DataFrame({
"Name": ["Braund, Mr. Owen Harris",
- "Allen, Mr. William Henry",
- "Bonnell, Miss. Elizabeth"],
+ "Allen, Mr. William Henry",
+ "Bonnell, Miss. Elizabeth"],
"Age": [22, 35, 58],
"Sex": ["male", "male", "female"]}
- )
+ )
df
To manually store data in a table, create a ``DataFrame``. When using a Python dictionary of lists, the dictionary keys will be used as column headers and
@@ -111,7 +111,7 @@ You can create a ``Series`` from scratch as well:
.. ipython:: python
- ages = pd.Series([22, 35, 58], name = "Age")
+ ages = pd.Series([22, 35, 58], name="Age")
ages
A pandas ``Series`` has no column labels, as it is just a single column
diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst
index e6b87bb16cf2e..3f346d218cd60 100644
--- a/doc/source/getting_started/intro_tutorials/04_plotting.rst
+++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst
@@ -8,7 +8,9 @@
import matplotlib.pyplot as plt
# use the pandas main colors
import matplotlib as mpl
- mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#150458", "#FFCA00", "#E70488"])
+ mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#150458",
+ "#FFCA00",
+ "#E70488"])
.. raw:: html
@@ -129,7 +131,9 @@ standard Python to get an overview of the available plot methods:
.. ipython:: python
- [method_name for method_name in dir(air_quality.plot) if not method_name.startswith("_") ]
+ [method_name for method_name in dir(air_quality.plot) \
+ if not method_name.startswith("_")
+ ]
.. note::
In many development environments as well as ipython and
diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
index b080dbd2f158f..042faf309b812 100644
--- a/doc/source/getting_started/intro_tutorials/05_add_columns.rst
+++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst
@@ -95,7 +95,8 @@ I want to check the ratio of the values in Paris versus Antwerp and save the res
.. ipython:: python
- air_quality["ratio_paris_antwerp"] = air_quality["station_paris"] / air_quality["station_antwerp"]
+ air_quality["ratio_paris_antwerp"] = \
+ air_quality["station_paris"] / air_quality["station_antwerp"]
air_quality.head()
The calculation is again element-wise, so the ``/`` is applied *for the
@@ -120,9 +121,10 @@ I want to rename the data columns to the corresponding station identifiers used
.. ipython:: python
- air_quality_renamed = air_quality.rename(columns = {"station_antwerp": "BETR801",
- "station_paris": "FR04014",
- "station_london": "London Westminster"})
+ air_quality_renamed = air_quality.rename(
+ columns={"station_antwerp": "BETR801",
+ "station_paris": "FR04014",
+ "station_london": "London Westminster"})
.. ipython:: python
@@ -143,7 +145,7 @@ lowercase letters can be done using a function as well:
.. ipython:: python
- air_quality_renamed = air_quality_renamed.rename(columns = str.lower)
+ air_quality_renamed = air_quality_renamed.rename(columns=str.lower)
air_quality_renamed.head()
.. raw:: html
diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
index 7335560dd76f9..a95728e5c84d2 100644
--- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
@@ -122,8 +122,8 @@ aggregating statistics for given columns can be defined using the
.. ipython:: python
- titanic.agg({'Age' : ['min', 'max', 'median', 'skew'],
- 'Fare' : ['min', 'max', 'median', 'mean']})
+ titanic.agg({'Age': ['min', 'max', 'median', 'skew'],
+ 'Fare': ['min', 'max', 'median', 'mean']})
.. raw:: html
diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
index 54a374d547f20..cd31e19b3c68d 100644
--- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
+++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst
@@ -173,11 +173,12 @@ called ``no2_subset``
.. ipython:: python
- no2 = air_quality[air_quality["parameter"] == "no2"] # filter for no2 data only
+ no2 = air_quality[air_quality["parameter"] == "no2"] # filter for no2 data only
.. ipython:: python
- no2_subset = no2.sort_index().groupby(["location"]).head(2) # use 2 measurements (head) for each location (groupby)
+ # use 2 measurements (head) for each location (groupby)
+ no2_subset = no2.sort_index().groupby(["location"]).head(2)
no2_subset
.. image:: ../../_static/schemas/07_pivot.svg
diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
index efed17a473e18..9ca9c3cc49296 100644
--- a/doc/source/getting_started/intro_tutorials/09_timeseries.rst
+++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst
@@ -211,7 +211,7 @@ Plot the typical :math:`NO_2` pattern during the day of our time series of all s
ax=axs)
plt.xlabel("Hour of the day"); # custom x label using matplotlib
@savefig 09_bar_chart.png
- plt.ylabel("$NO_2\;(µg/m^3)$");
+ plt.ylabel("$NO_2 (µg/m^3)$");
Similar to the previous case, we want to calculate a given statistic
(e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the
From e1aab581d0564a097446386a37cc1e678afb93ec Mon Sep 17 00:00:00 2001
From: stijnvanhoey
Date: Mon, 20 Jan 2020 21:09:18 +0100
Subject: [PATCH 37/47] Add schemas
---
.../_static/schemas/01_table_dataframe.svg | 262 +
.../_static/schemas/01_table_series.svg | 127 +
.../_static/schemas/01_table_spreadsheet.png | Bin 0 -> 46286 bytes
.../_static/schemas/02_io_readwrite.svg | 1401 ++++
.../_static/schemas/03_subset_columns.svg | 327 +
.../schemas/03_subset_columns_rows.svg | 272 +
doc/source/_static/schemas/03_subset_rows.svg | 316 +
.../_static/schemas/04_plot_overview.svg | 6443 +++++++++++++++++
doc/source/_static/schemas/05_newcolumn_1.svg | 347 +
doc/source/_static/schemas/05_newcolumn_2.svg | 347 +
doc/source/_static/schemas/05_newcolumn_3.svg | 352 +
doc/source/_static/schemas/06_aggregate.svg | 211 +
doc/source/_static/schemas/06_groupby.svg | 307 +
.../_static/schemas/06_groupby_agg_detail.svg | 619 ++
.../schemas/06_groupby_select_detail.svg | 697 ++
doc/source/_static/schemas/06_reduction.svg | 222 +
doc/source/_static/schemas/06_valuecounts.svg | 269 +
doc/source/_static/schemas/07_melt.svg | 315 +
doc/source/_static/schemas/07_pivot.svg | 338 +
doc/source/_static/schemas/07_pivot_table.svg | 455 ++
.../_static/schemas/08_concat_column.svg | 465 ++
doc/source/_static/schemas/08_concat_row.svg | 392 +
doc/source/_static/schemas/08_merge_left.svg | 608 ++
23 files changed, 15092 insertions(+)
create mode 100644 doc/source/_static/schemas/01_table_dataframe.svg
create mode 100644 doc/source/_static/schemas/01_table_series.svg
create mode 100644 doc/source/_static/schemas/01_table_spreadsheet.png
create mode 100644 doc/source/_static/schemas/02_io_readwrite.svg
create mode 100644 doc/source/_static/schemas/03_subset_columns.svg
create mode 100644 doc/source/_static/schemas/03_subset_columns_rows.svg
create mode 100644 doc/source/_static/schemas/03_subset_rows.svg
create mode 100644 doc/source/_static/schemas/04_plot_overview.svg
create mode 100644 doc/source/_static/schemas/05_newcolumn_1.svg
create mode 100644 doc/source/_static/schemas/05_newcolumn_2.svg
create mode 100644 doc/source/_static/schemas/05_newcolumn_3.svg
create mode 100644 doc/source/_static/schemas/06_aggregate.svg
create mode 100644 doc/source/_static/schemas/06_groupby.svg
create mode 100644 doc/source/_static/schemas/06_groupby_agg_detail.svg
create mode 100644 doc/source/_static/schemas/06_groupby_select_detail.svg
create mode 100644 doc/source/_static/schemas/06_reduction.svg
create mode 100644 doc/source/_static/schemas/06_valuecounts.svg
create mode 100644 doc/source/_static/schemas/07_melt.svg
create mode 100644 doc/source/_static/schemas/07_pivot.svg
create mode 100644 doc/source/_static/schemas/07_pivot_table.svg
create mode 100644 doc/source/_static/schemas/08_concat_column.svg
create mode 100644 doc/source/_static/schemas/08_concat_row.svg
create mode 100644 doc/source/_static/schemas/08_merge_left.svg
diff --git a/doc/source/_static/schemas/01_table_dataframe.svg b/doc/source/_static/schemas/01_table_dataframe.svg
new file mode 100644
index 0000000000000..9bd1c217b3ca2
--- /dev/null
+++ b/doc/source/_static/schemas/01_table_dataframe.svg
@@ -0,0 +1,262 @@
+
+
+
+
diff --git a/doc/source/_static/schemas/01_table_series.svg b/doc/source/_static/schemas/01_table_series.svg
new file mode 100644
index 0000000000000..d52c882f26868
--- /dev/null
+++ b/doc/source/_static/schemas/01_table_series.svg
@@ -0,0 +1,127 @@
+
+
+
+
diff --git a/doc/source/_static/schemas/01_table_spreadsheet.png b/doc/source/_static/schemas/01_table_spreadsheet.png
new file mode 100644
index 0000000000000000000000000000000000000000..b3cf5a0245b9cc955106b95bb718c039fa1dac74
GIT binary patch
literal 46286
zcmXuK19T8{4*R+t_$Fw$1l`zQ6xl=bo7}-RE}C
zoUX2V>Z$5*1vzm9SR7aY01zZ4M3evk+z0@`%%QQ2hGCQh#U4#t48nX{9Vv4dg2EDQh;0g@tuDsF3MSMw(myHd4xoP$-IHtKK-+uKZB<*q6lG;$8&}^sRBPJ
zIZbw+7+vhG9s&!Mb~41L>S529dZfb|f(fXpJY3D!A6kXHACE3QLOs`T>d=d7W;hg{6J^Ty6$P-$fi8DQb
z<}&r?T1K~}1B2~f3*lRB>|yeoOI2?9kXUrO=h~^Gf?AD9CigzyZV&9$`I@cp%te8?
zvkiUh`qjiW`V<|G#e((L+l5PV3Vw
zcmBmo8_(u1wJeyGKM=TaHsyvFmxSddBNx`?MOPP8NQi{9ranLWqd8oZ3BAlYnzq&T
zje{aA<(5cLgh>VB3Dcu?s%vfd?rgq4CX&R+d;$8MgjlweX2$g@is_$fWx1C=j%G)&
zMhDc>_FD6`Q+ynFZ#v{th5?aRyJuhaA;~Xh#f@C+Uyg`EMwxoe4;FkeM9{&Q+TGUQ
ziZeWJt14ksZg6=F?@W^fR{l)$CQ7us(EYC7{efUG5`#}&6N}3(=Ll0)R<_=3jRqSG
z^zJ*1#1dYfoRo`!|FuJJNV}G&kQ7_ICOHU{URvgH^0B$nVlNihDk=uXK!;j^6`mN5zaZAt=H%k
z_5`9RPcjHI0vIlEthVe1LWC4i3V%uat+?dx9fjn)E9@Upy
z!2wpJi_M0tl7i?of5hU_(dhNAm$$F=G3dk|VA}|AEe8IiEEiALxHY#$+8w#6eear@
z>&|$~x99QSFAM#S?3|GW?u3S`k-&(gZEe-<)YQI$_2PNby%b;p9}lyc=kvMEV0@{f
z4|={u#)^QS9sZir>c@AOz;#Ws70Cav)eqM{=%qS=YN;X(Ug%=)KH=(
z_oKw9AZVY$Ao5ybW$*#-xxDO`{S4DLk`3u}k%y$aIq=^_ZX+`OrDGIG
z0A}PKjE4*X_{Aqh%C`1z95amq8lt*sZV@LBo@ZZ3{
z9rRy+zk&w#lgLzwS9T_(aE^+`z8Pnx$}lxLU(#>={lj-PIJT)ltO)$ar+nEl|X*V!u1Uj(ho083PvRJ!y!=#pN-<5
zBMBm0`U$GBS3mFNFn)gA%rpS-vAwn#Fh-Ia!@(oUlnYldx1nws&CrirDQ|c+<2kLe
zaPmdJ;i06k8wpgzBd8~s?^KdYx9EpnK@`cf0VM|s-3wxs{A-vtt>H7
zyWFBN?aA(D<(T4II}dM+&)?d#cb-X|OoZRD0Ss`8N#J)*6w#Q8Nl8^&Epp}zV7ZNr
zPVX=GWMpKGb%oFJg2~24iHaA=+Fy8=i+2?X}k>uzkL-$eQ&jzL@5R`c86PiP30~3dR8J)KfOxB#CcW@7&*hD4si;_Kgq;rN5gicDBMK>x^r6B?Qd_;uv4
z3ZR2JU67!SheQ=107~P6SR&`9n|mk#fDi?v6rH}+s(H>aC=e+t+M_54(En@?1v6v>
z{PHF0^{GgnpS=Q@P1HSGadj9d%b1l@i$XJCz!Nj|6r?Pg2w?*90fFC%#h~r}frXV6
zmk48Wd|=_kT2q+3zxQS-@otfc%iDYGVwMs
zIXQWuL|(7Qr%Qk+H=1mUA3-rLBUbqZe#K=j`||URXs6BPcyCOIl7wIU{FNY4h@_9|
z7w)>ceViB5FKlFppT0mmDHw9~ZKjJZ#EtW~kZ%iDAbwsWr*l1yZ7_;uW8h8DwFTmH
z^duDI+Wo)#;FIXNJxvu@#;(WO8lSKqtvGiDqee_E{pMd|eia~*OB&RRzGSS6F8iz%
zwegzdEJC{{OTnl!d$O8RMBCcQDMktA%M^>4jGoKGuC2C3S0p^_-b
z9+pOwP{g>e#hB?IESleS%CWX%JU_-}dlu~Lav04O;LdQzAh5!I=$Bo(3Vkadc(F6O
zXl`G6658DHq@aJ2y*w;Foc(xBJFj3+<3T4Ul$ai`Cdz`}e4WllQ=yz|TCR$zcOfoU
z_+{A{)|~uFwTKAzF8gP7)!5Y3cB@n4SgIPNPvePHE-o$>K0>(C={+-=5vZ|M>q;99
z(lj$BJCSg&Rfu_f52KEt(lAykv
zjl*<{c>oH;^M)tm%?YLL+e>IUQga>8WO-0nFGAMw?gVpDF
z{@zh3+edXXxi%C%?YPJ1&PQ~He0|e4TTHo|z1ikNzHXLQ>pITq@hF75P<(-EbULMj
zK}+T2xPXn#8&KKJ8ML7{6;j-0Xgv;Wtjb4VSm&7S}z0qr^8VLP(l
z8@B77YI}&@T#*$0=Z5nd=Z9%t#30?5*D6ocmcr9B%YNI0$OzM(d+hq#FLQ~^W0O6I
zTM`Rz>wOU=G~A)V;!djb-ecBd?B&cxVrSi_We88Db)@?nJ1Cn4`5jK-F&BJyDYt}@
z0Q2s_mgPy%=Fc^jF0zUHz!
ze%{X@LhsKoMXPnm=xC`NiVl>FuJb{3?leOtviXY3M_K?o$x!}j!&-~QDro=zvc
z%uS}#YPMQ+xmhKVeNajnnT95CtXAp>yB=&(q&wtSpuFXE3CU4jd~xLffS)cNg_l>m
zLy2z(jed%ztiKsP9Xn?VE4GigXChu51HLbJz9&vZP{3(5={>i{$fRtip1r>x!>-xA
z#m*Lp$GYDhTt9p5Q{~L$u-od!67Z@(RU1yDHe5cgPye3IxlrmysW@-tlD(VrXw>+;
z?-1uS%=K|c9kAV7Gx2B#L`*h*YgxazILsRbU@+ONli7`wX}R-$UCjr8dbJfB!>nXT
zh}&gud*@FyGKt@$!8_kt5C@x23HK)#c?|JGtazEXFf0XV%?3g-ucH3EudE|Qxhz|C
zXUK5Zn>i|8dZ)aw8-5`kb<-OB0`TyJLv}w$>8-DR|`?Z%uf7HAOr$^Xjo;b-9D4
z*o9K*iV3Z%KeydT%|F<$A0FgnWHQ-pw<0iT00DE3uC6Xz16s?L*qXD;T@gRHmQzZ>?B9#Hx{9WQl2{qE`Ln=odf56*t7Y9P>f
zIs4m=Uq?s=nxvVK-_?p{Vg7buqy&-s!4WyW)p$0D`@9)H<4l%UkNYU@HYU1D7I!v(
z+;%FOFK=bLn7N^|Qs9r((N5bOTiDb2by3BPDJpFgLkmnBD-$+u?w*x=wxiWsh;2mH2+k>z_Tv1F_i=BY~A
zRRBo!FQ@_Yd-_MnO#lP86-#Izf4;zbwBJ2XWqJ@?4>sDLfEGgG3Nf@n8x@7VFCJ
z`mR|VG{|%sRu);@*d!ogi}>DtCIi*i4_%92>+w!Rf#3nHqG3-pv2l<GP!P2;~UV4-1^0F$~MwWZGhozs6?868D
z{L&lKYrO>Zun>W4Z|D6?gq9mS0a-2C%JwHJPm!93X-zvG1XF#|KS-;0Bee%Odu$Um
zMS6X8rlN%$c
zbtRB*YRuteGgxjPTBJ1C_dxUoKA$&xWb*SYcA{(&*B^Zpx6v>ZhD&DZgqOOO_=23b^OZ;!p!|J
zpYGeS#6u~|HoM}sT=VP6lN=wnZX1PSW?8dy{1AHWTPE)j&$L|S^R?l+
zGJSy+@@HJJHX%Y66jL=-Tk*IJF1}j-O>+}qp0VqN*@6Vk$n#1Y2>pwqbIx_eHz7{i
zLn5b(4ff1-@o#7}Em5ahO{STIF(D57`JW0oeosC=moPl99kWJ0aM<5rLjJVv^$yB5
zEBB#m8BA9J(zoQFKI^6$6o|lYx_k95mvSap;AW2au%>yvkxNs}5My-GP}xJ<001iP
zGj|=+_0-$Rt2UdJ1G8o`ZyQG5JVxJV&9DHX`oe6>WqB6C{kjIWq6000Kh7IsUXHrk
z$8h;-w!Dm4DUNO%qu!4lHD`s9>Ml?2mJGs(;5Si$XueyICxh}IbQDpR_74uPRj1si
zW(NjOckf%yHlX8^wGAojw%fC!UvtlHHi=7iAc*InRMwsw
z+_OxAPrqN8Pe$+QY1Xrqj1ZP^rLmy9ANmk7Gr^>-X}7@?RK)_CRaPIJ^(~KY7C1%W?Msr<9?myh(qS(FTLBzUs~mD$%$q(
zrM#J*#%|(b`(<5t*AHs#YR-xLU#kpZ+kdThi{W(gI%kch(rWnU*{U%qIT=rO7Q{;=
z4j)@SA$Smb=AL45tIs<-8@+7&ayHI8LW5gXn@(Vq_RkDC;AaF(?uU`mj$fLtP{%S0
z98)?kovzlPv1m{C_Wb$JANbf;oBkeU4pE_mCSp+_Jejwww2fWl_>gq8n11D?e$bRB
z0Ef+sTeRB?z{NZ0zETWuGn+7#F9KUwgDkRYd)J&>IyhDq{0qSVB`~G`lOzRtqsuUc
zDXp$*yH;nWogQY4pV?JZcN@)q0DmkIf52l06A&;V%x?SZy;rAIpi>t+GV++wyl^MNo&>&GW@#P&A{2Gf=27p%RIXc=r0b#@NwMVV{A;!w5^IIf>
zEVVz(V7+M!?%~eZtK=Vxk@0fzDGu9`YzQ^)@UgYX!Vo~6U42p~RbpU{qu<0U
zFPy4r3nHw=>i9O4z|BdYvQPWp6v+_)up=I)qb%;l^gG7i8drb)is+2pczp|cyO#u`
zmeeLWE1)*|_*>)i(L}r=Jrp35(_AH6ZlM1nCI))(yUk5&1UG!4XhpzM-gR>wa-Z~w
z%1{NT9RXAA
zevJ1DEQ#lSc6ZR-(`JtelFq}boS%=ne{d~XYkQZ<4dVa9XwUo8`Kw9c6xqlpCt2hx3c;hhy$yeFF%D?>Bu#N8ylfOIbS_=LGJn+7@sCRvuUz2uVA)$47Sid?kowdBrL{PN(
zMr0pv^&1Wy5Mmj*n4v6p3zrsCiA@O$(Yf$?(CzU|-<;vKf&(cZ-a>5r03g{P$iC(6+U5Y{YFvWRsa?1$
z@eFO|{Veqw8o=wmFch1-8(n`Tz;H(6j}M);4KYurO1(V_Tru@v0swS*%;pS8V7`R?
za=#=|?#YG%0CdBT_HQKZN;R^;^SROsfM0864TqWhdZXPfOLpNyK*zZ6J=x$ZSO
zyGvl^NiB#d1M@1^poa%>=o0?nV3;$$pCss3t0{xO&@oq2#T6-v^zXv(kw600>!sB!ftOM>2~_|kf&>h-RsNYxw9I6a@ocf0XS|=i
zH@s7#Iug_xNE0d-4p&y;0jabm2^i;UmK^1dc_y^*Avf5@zhq2)IxwLZNT33~ZMQ48
z9?}nZ@OybTo^XaSJ8RP#EYX<(+?+-_>
zeA*f&KFv(lRbSRE7}E{0R7Y+*`)7Qxen5xGh|-lyH2nO^Ucb!|@^108bjcJ_e?tr%
zp+E`=K);j;k-#U{%PQy;ebcQ9bU@l_uhzok>Hc{danD`7jd}=$@My77Ao3gigwtXq
z{2wMOZbbT!EDCG@xBvhRDF-&?i@Xn1I@~qAG}0d`NwbHXug_EL_FrS|lLCHGIhm?5
z7Bx&GG_}v6q4JREfX2&_kCL@`@{~zH5Pd=pfDKLApsdns$!5m+bRoRELyX|U&nj_a=2Um(T
zG4;>-X@Nize}uQTptn*K*}OIr^W$~&2&aLulH)dM5{`xjBbdIUdjI%1HF2utWp^i>
z<1#tKLnPa0y_5s^I+PDb-K8WI3_wIDA@eVR=+G2{X!!&GCF;f8iX$yirBHGIy90ra
zXQEEkSO=;h4crm}HAShn!uWMptTZT%X#SD$=!ME5cpjh}bNPHjLWGB?rLI`^lL=*i
z>@bC{$AH<&ji$L)$zbPiudqLLk1y|svZ^np91IOvaSph4B$k;LgA=%>dk?oSy$&ML
zr`yLOT1KscvIs?P)6Dv}mY8n!o|L<+tRPHw3=LenN#j#uqEk(W;
z&-lLALIL+Ey=TI2u!y#?hao|0OP9CDP|Y`PxSlOh;6#x&+g@b~x>maxZVyiBcmf~S
zXW`<_RYI~TfXgqM%N6eCo=5ZZzH6)VTc7oql`ZRl)o4#EXQA(!q=Xv}xgr`H?hM-)
zi&8Cb$7#xhg+M(Lg?!JoC0cK>^^9*!G%hSpTE8%P
zb+Lh=$Kw9P+2zNCYuY$xh5%`?TG#OUuwC!*hW)lb3vGePVk>$4!at0F3>uv0@$!Jy
zi49U@hJUttFU>xV0xdOL5D_Cq>dSkp>_0Z;`o_pewoNJuCL)E?BPzo)MfCHRiJSPQ
z4Dm-aeeCcn9;j)(9fOCZUKJTvul-hk)4RYC+pHzInt#Sdn};bm4apxHpRED8QCc$E
z%#&^YI^Ru3XYEsxW$-^oxBQGtFV*)E8Q~^9C
zge$_|{zah~eqjOLI_|d`oQ!f3UWgN1XD)M1SBpYlL#o)`2kyIZCQsX(tdAp{2ijqo
zqQJk*9r>SjJ+|?Ui7>B4~DM%4ie4G{%)muKKoEbRZhyJJbEtpSKx%9Qx7fD@frj
zYh^UtriDZe%@Uh!&AR(oHtB-P%%cj`>h7jE{(da$%WeImTrOsKakG5FF0Dp8JZ2b_WCdRUz7`Y=4`j3p((%GoW4R#JiGcb2Tk5ZHy4);flt<9S
zU$hy$F_&xzLEx}4EVV2$j71rb?4cV=hPd|MYjK@x`ot
z(`rv7q_My#-yRLcY65BAL;kb0XJgqQBP_i4NagyD`SVa|ca8Me%@+Q`r!URu|6dF6
zB&0i{_lYecum15>Qby+P;UOE8wVO}Ut}_@Kj>bhpL;D%qGK*Wbfb#)|j=ZJEV;B0K
z{N+2I0$1b_*TdzCf{%IB;}Ai3M|;B&?aQTk>86V-^@|&b@2hd%pU&czH69xn80hJC
z`}}w(Kcj85SUNe4yuQBPuJ%O5!71l)yV)D-DOo_r#(ugV!eY>UTjz3R-Lg12=(uo<
ztr`1%b9MFfVxXinzn!Lz1xi({C@U-XC`QT2$+60_Ff)H-C$6opG@w#)bH8pYyjNCM
zs^~IMQs&|_;H)(ivKBUkJ@A|9VcM$g&`Re9AdFJ7d5iLpP@U`USuSr%p?P*jCV7
z&pD!bewDPd3tz~UUvo0j=9V%-LW(7MClGMiD3OZgbL0-;rugNI$&O9}f+4feK!bjQ
z8%lcJ--8RCKf+^neLU}td}*S8`37l=$oXrW?ejx_ri2U$tXFe{o{i++cIHj=?I%b3
zEqYns2(AJ=E>HDmqj5sIsy~fd_2Mfg9iV`YfreIavs}bqbM~iWWq6!qb_LF`bgM3*
zH#}&?*6|i@BpPqE%W{$(H9v&EM%SU%_3q|I742ZZ0_*v?O^3_1!dd57x!Wu0>SRhf
zfQXWeRO6mOlo%QZ9T^>UL#RldTv^9>dSc=`H246crRAw_IckKIBWg{}jF){Machn3
z9X$RQ>2}E%?s^@m%yzvi>Mjy8c?h6-S(~3i_meGI8?m@8uJJBPahKxH>KegXhnD;P
z%~FLW`%*Xe^$`&7HlS!swO$c7-_s8XvlCPbLJg+u;t;o?4EzewSSoU#47A{@EAa
z_+_Smf4RMt;XU^9@}6t<;7z5)-H;Fwc{hJU>in_Z1Jm{3zMul)+C9<|Dsq#{qqV{|
zgIwl$E~~^uK6}cDIh6Jipga5$KuO4WRVj?p1UomQz%XP-zv)})xVBhrs;AVwt#QSZmU)re5o7#N1bQDuCaO_nJy
z3*_O(kGFI+9L5`_)&y~aBogJ$|#=N@fjD}n$2MTbK(@*M01Z`-zn_fL*mQqG4h}g5>
zXnV@CE)**lMo2p>syP$V?cv2*`13~ulSF*3L;xvFckvv0H6UZ(0t`VAgR
zbm`*{s_#0R?n?FZ>*E(c{aAv+Hg_Gx1oWZZ;~q&bG*na`$K@HJ-*H5ecob4nQgn1x
zQWN9w2nZ`xy3gd-(Z7E)Nh(tB{|HBB9TNe<&(NLU51dIc?49h&o!QCAG^i6?&XDUo
zZ9VA5{*>r;%g}%Id+XOOj3|%KryTE}lq!GNEU`#)cXuBi9v&y0Fsjeb&mSL`+NW#N
zwlOgoHfcy=URYRYYEH4Z5hQD~wGAG@LpB}Zb_>$odbA8Y`BSP`aMIpnIzvFmS}5T&
zUqkZY{Cy-mWJ>JgJ-M>Bb~bQ$c8wSSAwruMiroXZ7Ya?U%LnndM!hI|Hx
z*T~*7OY?RXE|ASBndlF;T7_GC@+L+o8Q)^NZH|KawzG74%5J#1xlul{m(45R`pLL8
z_{VJUN}q?IMi7Tu>&w0#gy0G9Qng|^nYeqMb|C-=(>Iu;4i73_HV7z^s-;LHL&WT&
zi{f*7l(rU{^|yvYL3P6HlG|*z-S_r+3qv6(n=?@P{W;Va57azaLVvB|nE++-mmAZa
zi;=(LjU@5jS%3_!c$-Wg##0!#_hS=x1KrtCc}kC&?XVpraDV;I0!U1!lpOq|Ea>QQ
z8am8VM6Y8r`g3FV+05%Ll;0E?MpUk(sQO?&MfTDSJ+ZBn*f#0D7c$9mZCt|XK`I%i
zm(e(UYc6vgnOPYXnUIt`esi0&Ux|h(CP{kVYGWX2Y%JO)fQ*fP^|!A8Q=d46K?hB`
z&>%c&EM7%bHEa;I;H7;+mPDkfjRaayR71sR@g7(08}`8TMFYN+v~(4+s+dazEdzt#
z$p<=~Fi40p0;sTXV)+{V6baZrJ
zegpsBvZS{ta}^>~RQCMp6szRvSEQRnW#r}cy1h69`_#KUn*99*zc&)YZOI%uMDuv`
zczO(L!l9-ynq8lvGc`%47F+Gl50mE&7N=;byq5?sT#jSk${5$~rT_bWrE
z-?>Dm9*ozlx9BaGGigx&U2dZ}9xjue?4Pa-6_F{E%l@kWWB+&|i_M;VZ;lU&n@zP^
ztlTeliZn|*
zWA1m3#zEsdP4eEKg{r%1XWRE%!AUp+B;vb)bq9W`75xR;~Nk7
z5qVgzDX`m=hNtIK{+7@tt~5G5+KB0h-5s3Da0Nt3NVtMRAixEJYn4b*X$%z$BzJ=s
zowG_xa_*THmWSD>>u;A%)sP|T>g%EWCS$)D?~|cqLS
z9+pdo$n*;RRtCE*xPYBqZFY7xIjTx%-e~_|POAsc2xd~L@sx|@(CZ}i=rcPlIuH9_6p
z))SC|&;x(vGSl%|Q9Px^)z}$(_2^T!eL?y^5Nf^NsHmD(hDPD$;f4%hf3{Z^5dlk|
zQqdN|iW{VP9$0Uvb6V|UtRh``le<<@>0ZZFxA;m69ZV8W8Z3k&h%7lKDJUq&?Rq{r
zIVqw@%EIEfa-jptIm_qpo#Pbr^WNJtZZI>c7}K&2`OwVNj~m3|s>zF?0DK2g0Gu8LZ0a7$|@h
zH}~Wpb#JWLF{7|S1E`R94r7B&-GbCHqaP9y=D}}Q(UV^uCy+MeuUwe%P|fvf;@ZvT
z@{KpLw8X^3V1s$M{-+3x@Cjo^MPy`ITw+&n4*L$z38ph#&PRn?4TFql!}qw*0YrY_
z-~xh&^jTm6{=3|+wpFfO@>5nn*x1PVt5D0k$uJ|6e4U-0dzyZ_dAmQqc6dT-A7Ia7
z0914|4fRd(-OU`&YX*sZ4%Y8ozB8gY5vb4*FfcH{Y@TrFaB1SB2WBXO95GG81^k3G
zCn5kjmeGwTFMdx)B8%6(!eYCvK%q_ss*0pOWs{7T@-#PJ9h}I@rr1xbk4}i+YKCsQ
zxihn|H4W8=4;Jb(V#bGumi&qodQhF6&8Ie<{loKhxl1FfpiimnPVntJ-pGhtW+h%|
ztNUq9vvs+uN@{8<;5Yv#1@7cWGWF_fK#S{6c)ImTN(t0!cjtOXOETRIC@ipy_zy~$
z_%U4i(1?(rh$@o&h6YH!lU#21oSvTQDWt{{m1YyH^z{*QQc-c0I~oqM)zwuJI4Q~b
zFY-g(k2T!=DamIhi>AceL3Y151zcS8WVn{^Ww~z>>QqqikTq9&e@kVt
z#uSY!to~8|LPYd~JMnrjC~teaUT3K_?rwMRQM?P46A1@JT1TV2F_BafTr??@FRR5Q
zy1J(3It+zWQYtb6I>O-fg?inIHQ2z)YUlXh@&3Px;z1r>p6F=VMKdL73H@-ho8bhe
ze>MfLA>qame@Zmq9?8YpmcAEOpK@`v6I|#b?d}d}1|?x)fxH5b$6IN3w{C1TZ)D^R
znbL9KlN+I}9zMPe$Wow*NzI@N*H!Cu7*K}fQwHRsi~c4B16qZcT<#8IgsSfOmYD7F
zM(nj$S4EITKm}sd-RWL$PuHhu4Vq~F&a&Sum?K_4wz9#L*;yE&BzH*jh+u-dL|=Pn
z+0gUiq>I#IYOXKJk7>oUwKE;+;6Stl2;s4#k&m9l>_g1;W
zd3i9ZciisMdfn8ucx8_a4FT@&H?q6>=g(r3moRxkT54*i?`Jot%s^HhcI?>w{EW_y
z>ff&+CT8ZvrKKXu`p!<7-#L|!PSO;gY?n`a`*%N*>DBs&&{
zNn6FR}kWdg{0{xMu&;fRB8>3-IpkI4Xrxe4w
zCks9xCdT#5`E7};S+H3ljnY`tf}rAxw!$(mJ^0I&`7{Wl!G>BYUw{c5PW@Z@iZ1#M
z8ynt+&BFJ_C5eA#db)brj8)H<3>uV~P(SeY@^v^AYO8!urR{5u`<)%ZguONrkN_&J
zBh3>4qr{&LXKH$CI1==|B?1CSARj06*25LIaODi=^NW4I0{O0&H$I