Skip to content

Commit 8e4dfff

Browse files
committed
Merge remote-tracking branch 'upstream/master' into asnullabletype
2 parents 2efb8ea + bbcda98 commit 8e4dfff

File tree

135 files changed

+2449
-1630
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+2449
-1630
lines changed

.devcontainer.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// For format details, see https://aka.ms/vscode-remote/devcontainer.json or the definition README at
2+
// https://github.com/microsoft/vscode-dev-containers/tree/master/containers/python-3-miniconda
3+
{
4+
"name": "pandas",
5+
"context": ".",
6+
"dockerFile": "Dockerfile",
7+
8+
// Use 'settings' to set *default* container specific settings.json values on container create.
9+
// You can edit these settings after create using File > Preferences > Settings > Remote.
10+
"settings": {
11+
"terminal.integrated.shell.linux": "/bin/bash",
12+
"python.condaPath": "/opt/conda/bin/conda",
13+
"python.pythonPath": "/opt/conda/bin/python",
14+
"python.formatting.provider": "black",
15+
"python.linting.enabled": true,
16+
"python.linting.flake8Enabled": true,
17+
"python.linting.pylintEnabled": false,
18+
"python.linting.mypyEnabled": true,
19+
"python.testing.pytestEnabled": true,
20+
"python.testing.cwd": "pandas/tests"
21+
},
22+
23+
// Add the IDs of extensions you want installed when the container is created in the array below.
24+
"extensions": [
25+
"ms-python.python",
26+
"ms-vscode.cpptools"
27+
]
28+
}

.travis.yml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ python: 3.7
77
# travis cache --delete inside the project directory from the travis command line client
88
# The cache directories will be deleted if anything in ci/ changes in a commit
99
cache:
10-
ccache: true
11-
directories:
12-
- $HOME/.cache # cython cache
13-
- $HOME/.ccache # compiler cache
10+
ccache: true
11+
directories:
12+
- $HOME/.cache # cython cache
13+
- $HOME/.ccache # compiler cache
1414

1515
env:
1616
global:
@@ -20,13 +20,13 @@ env:
2020
- secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA="
2121

2222
git:
23-
# for cloning
24-
depth: false
23+
# for cloning
24+
depth: false
2525

2626
matrix:
27-
fast_finish: true
27+
fast_finish: true
2828

29-
include:
29+
include:
3030
- env:
3131
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
3232

@@ -40,6 +40,9 @@ matrix:
4040
- postgresql
4141

4242
- env:
43+
# Enabling Deprecations when running tests
44+
# PANDAS_TESTING_MODE="deprecate" causes DeprecationWarning messages to be displayed in the logs
45+
# See pandas/_testing.py for more details.
4346
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
4447
services:
4548
- mysql
@@ -70,7 +73,6 @@ before_install:
7073
# This overrides travis and tells it to look nowhere.
7174
- export BOTO_CONFIG=/dev/null
7275

73-
7476
install:
7577
- echo "install start"
7678
- ci/prep_cython_cache.sh
@@ -87,5 +89,5 @@ script:
8789
after_script:
8890
- echo "after_script start"
8991
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
90-
- ci/print_skipped.py
92+
- ci/print_skipped.py
9193
- echo "after_script done"

Dockerfile

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
FROM continuumio/miniconda3
2+
3+
# if you forked pandas, you can pass in your own GitHub username to use your fork
4+
# i.e. gh_username=myname
5+
ARG gh_username=pandas-dev
6+
ARG pandas_home="/home/pandas"
7+
8+
# Avoid warnings by switching to noninteractive
9+
ENV DEBIAN_FRONTEND=noninteractive
10+
11+
# Configure apt and install packages
12+
RUN apt-get update \
13+
&& apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \
14+
#
15+
# Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
16+
&& apt-get -y install git iproute2 procps iproute2 lsb-release \
17+
#
18+
# Install C compilers (gcc not enough, so just went with build-essential which admittedly might be overkill),
19+
# needed to build pandas C extensions
20+
&& apt-get -y install build-essential \
21+
#
22+
# cleanup
23+
&& apt-get autoremove -y \
24+
&& apt-get clean -y \
25+
&& rm -rf /var/lib/apt/lists/*
26+
27+
# Switch back to dialog for any ad-hoc use of apt-get
28+
ENV DEBIAN_FRONTEND=dialog
29+
30+
# Clone pandas repo
31+
RUN mkdir "$pandas_home" \
32+
&& git clone "https://github.com/$gh_username/pandas.git" "$pandas_home" \
33+
&& cd "$pandas_home" \
34+
&& git remote add upstream "https://github.com/pandas-dev/pandas.git" \
35+
&& git pull upstream master
36+
37+
# Because it is surprisingly difficult to activate a conda environment inside a DockerFile
38+
# (from personal experience and per https://github.com/ContinuumIO/docker-images/issues/89),
39+
# we just update the base/root one from the 'environment.yml' file instead of creating a new one.
40+
#
41+
# Set up environment
42+
RUN conda env update -n base -f "$pandas_home/environment.yml"
43+
44+
# Build C extensions and pandas
45+
RUN cd "$pandas_home" \
46+
&& python setup.py build_ext --inplace -j 4 \
47+
&& python -m pip install -e .

LICENSE

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
BSD 3-Clause License
22

3-
Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
3+
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
44
All rights reserved.
55

6+
Copyright (c) 2011-2020, Open source contributors.
7+
68
Redistribution and use in source and binary forms, with or without
79
modification, are permitted provided that the following conditions are met:
810

asv_bench/benchmarks/reshape.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ def time_pivot_table_categorical_observed(self):
161161
observed=True,
162162
)
163163

164+
def time_pivot_table_margins_only_column(self):
165+
self.df.pivot_table(columns=["key2", "key3"], margins=True)
166+
164167

165168
class Crosstab:
166169
def setup(self):

ci/deps/azure-37-locale.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,6 @@ dependencies:
3434
- xlsxwriter
3535
- xlwt
3636
- pyarrow>=0.15
37+
- pip
38+
- pip:
39+
- pyxlsb

ci/deps/azure-macos-36.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@ dependencies:
3333
- pip
3434
- pip:
3535
- pyreadstat
36+
- pyxlsb

ci/deps/azure-windows-37.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,6 @@ dependencies:
3535
- xlsxwriter
3636
- xlwt
3737
- pyreadstat
38+
- pip
39+
- pip:
40+
- pyxlsb

ci/deps/travis-36-cov.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,4 @@ dependencies:
5151
- coverage
5252
- pandas-datareader
5353
- python-dateutil
54+
- pyxlsb

ci/print_skipped.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22
import os
33
import xml.etree.ElementTree as et
44

doc/make.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22
"""
33
Python script for building documentation.
44

doc/source/development/contributing.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,17 @@ requires a C compiler and Python environment. If you're making documentation
146146
changes, you can skip to :ref:`contributing.documentation` but you won't be able
147147
to build the documentation locally before pushing your changes.
148148

149+
Using a Docker Container
150+
~~~~~~~~~~~~~~~~~~~~~~~~
151+
152+
Instead of manually setting up a development environment, you can use Docker to
153+
automatically create the environment with just several commands. Pandas provides a `DockerFile`
154+
in the root directory to build a Docker image with a full pandas development environment.
155+
156+
Even easier, you can use the DockerFile to launch a remote session with Visual Studio Code,
157+
a popular free IDE, using the `.devcontainer.json` file.
158+
See https://code.visualstudio.com/docs/remote/containers for details.
159+
149160
.. _contributing.dev_c:
150161

151162
Installing a C compiler
@@ -1525,3 +1536,19 @@ The branch will still exist on GitHub, so to delete it there do::
15251536
git push origin --delete shiny-new-feature
15261537

15271538
.. _Gitter: https://gitter.im/pydata/pandas
1539+
1540+
1541+
Tips for a successful Pull Request
1542+
==================================
1543+
1544+
If you have made it to the `Review your code`_ phase, one of the core contributors may
1545+
take a look. Please note however that a handful of people are responsible for reviewing
1546+
all of the contributions, which can often lead to bottlenecks.
1547+
1548+
To improve the chances of your pull request being reviewed, you should:
1549+
1550+
- **Reference an open issue** for non-trivial changes to clarify the PR's purpose
1551+
- **Ensure you have appropriate tests**. These should be the first part of any PR
1552+
- **Keep your pull requests as simple as possible**. Larger PRs take longer to review
1553+
- **Ensure that CI is in a green state**. Reviewers may not even look otherwise
1554+
- **Keep** `Updating your pull request`_, either by request or every few days

doc/source/ecosystem.rst

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,14 @@ also goes beyond matplotlib and pandas with the option to perform statistical
122122
estimation while plotting, aggregating across observations and visualizing the
123123
fit of statistical models to emphasize patterns in a dataset.
124124

125-
`yhat/ggpy <https://github.com/yhat/ggpy>`__
126-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125+
`plotnine <https://github.com/has2k1/plotnine/>`__
126+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127127

128128
Hadley Wickham's `ggplot2 <https://ggplot2.tidyverse.org/>`__ is a foundational exploratory visualization package for the R language.
129129
Based on `"The Grammar of Graphics" <https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html>`__ it
130130
provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data.
131-
It's really quite incredible. Various implementations to other languages are available,
132-
but a faithful implementation for Python users has long been missing. Although still young
133-
(as of Jan-2014), the `yhat/ggpy <https://github.com/yhat/ggpy>`__ project has been
134-
progressing quickly in that direction.
131+
Various implementations to other languages are available.
132+
A good implementation for Python users is `has2k1/plotnine <https://github.com/has2k1/plotnine/>`__.
135133

136134
`IPython Vega <https://github.com/vega/ipyvega>`__
137135
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/getting_started/install.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and
264264
pymysql 0.7.11 MySQL engine for sqlalchemy
265265
pyreadstat SPSS files (.sav) reading
266266
pytables 3.4.2 HDF5 reading / writing
267+
pyxlsb 1.0.5 Reading for xlsb files
267268
qtpy Clipboard I/O
268269
s3fs 0.3.0 Amazon S3 access
269270
tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_)

doc/source/user_guide/io.rst

Lines changed: 70 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
2323
text;`JSON <https://www.json.org/>`__;:ref:`read_json<io.json_reader>`;:ref:`to_json<io.json_writer>`
2424
text;`HTML <https://en.wikipedia.org/wiki/HTML>`__;:ref:`read_html<io.read_html>`;:ref:`to_html<io.html>`
2525
text; Local clipboard;:ref:`read_clipboard<io.clipboard>`;:ref:`to_clipboard<io.clipboard>`
26-
binary;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
26+
;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
2727
binary;`OpenDocument <http://www.opendocumentformat.org>`__;:ref:`read_excel<io.ods>`;
2828
binary;`HDF5 Format <https://support.hdfgroup.org/HDF5/whatishdf5.html>`__;:ref:`read_hdf<io.hdf5>`;:ref:`to_hdf<io.hdf5>`
2929
binary;`Feather Format <https://github.com/wesm/feather>`__;:ref:`read_feather<io.feather>`;:ref:`to_feather<io.feather>`
@@ -2768,7 +2768,8 @@ Excel files
27682768

27692769
The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``)
27702770
files using the ``xlrd`` Python module. Excel 2007+ (``.xlsx``) files
2771-
can be read using either ``xlrd`` or ``openpyxl``.
2771+
can be read using either ``xlrd`` or ``openpyxl``. Binary Excel (``.xlsb``)
2772+
files can be read using ``pyxlsb``.
27722773
The :meth:`~DataFrame.to_excel` instance method is used for
27732774
saving a ``DataFrame`` to Excel. Generally the semantics are
27742775
similar to working with :ref:`csv<io.read_csv_table>` data.
@@ -3229,6 +3230,30 @@ OpenDocument spreadsheets match what can be done for `Excel files`_ using
32293230
Currently pandas only supports *reading* OpenDocument spreadsheets. Writing
32303231
is not implemented.
32313232

3233+
.. _io.xlsb:
3234+
3235+
Binary Excel (.xlsb) files
3236+
--------------------------
3237+
3238+
.. versionadded:: 1.0.0
3239+
3240+
The :func:`~pandas.read_excel` method can also read binary Excel files
3241+
using the ``pyxlsb`` module. The semantics and features for reading
3242+
binary Excel files mostly match what can be done for `Excel files`_ using
3243+
``engine='pyxlsb'``. ``pyxlsb`` does not recognize datetime types
3244+
in files and will return floats instead.
3245+
3246+
.. code-block:: python
3247+
3248+
# Returns a DataFrame
3249+
pd.read_excel('path_to_file.xlsb', engine='pyxlsb')
3250+
3251+
.. note::
3252+
3253+
Currently pandas only supports *reading* binary Excel files. Writing
3254+
is not implemented.
3255+
3256+
32323257
.. _io.clipboard:
32333258

32343259
Clipboard
@@ -4220,46 +4245,49 @@ Compression
42204245
all kinds of stores, not just tables. Two parameters are used to
42214246
control compression: ``complevel`` and ``complib``.
42224247

4223-
``complevel`` specifies if and how hard data is to be compressed.
4224-
``complevel=0`` and ``complevel=None`` disables
4225-
compression and ``0<complevel<10`` enables compression.
4226-
4227-
``complib`` specifies which compression library to use. If nothing is
4228-
specified the default library ``zlib`` is used. A
4229-
compression library usually optimizes for either good
4230-
compression rates or speed and the results will depend on
4231-
the type of data. Which type of
4232-
compression to choose depends on your specific needs and
4233-
data. The list of supported compression libraries:
4234-
4235-
- `zlib <https://zlib.net/>`_: The default compression library. A classic in terms of compression, achieves good compression rates but is somewhat slow.
4236-
- `lzo <https://www.oberhumer.com/opensource/lzo/>`_: Fast compression and decompression.
4237-
- `bzip2 <http://bzip.org/>`_: Good compression rates.
4238-
- `blosc <http://www.blosc.org/>`_: Fast compression and decompression.
4239-
4240-
Support for alternative blosc compressors:
4241-
4242-
- `blosc:blosclz <http://www.blosc.org/>`_ This is the
4243-
default compressor for ``blosc``
4244-
- `blosc:lz4
4245-
<https://fastcompression.blogspot.dk/p/lz4.html>`_:
4246-
A compact, very popular and fast compressor.
4247-
- `blosc:lz4hc
4248-
<https://fastcompression.blogspot.dk/p/lz4.html>`_:
4249-
A tweaked version of LZ4, produces better
4250-
compression ratios at the expense of speed.
4251-
- `blosc:snappy <https://google.github.io/snappy/>`_:
4252-
A popular compressor used in many places.
4253-
- `blosc:zlib <https://zlib.net/>`_: A classic;
4254-
somewhat slower than the previous ones, but
4255-
achieving better compression ratios.
4256-
- `blosc:zstd <https://facebook.github.io/zstd/>`_: An
4257-
extremely well balanced codec; it provides the best
4258-
compression ratios among the others above, and at
4259-
reasonably fast speed.
4260-
4261-
If ``complib`` is defined as something other than the
4262-
listed libraries a ``ValueError`` exception is issued.
4248+
* ``complevel`` specifies if and how hard data is to be compressed.
4249+
``complevel=0`` and ``complevel=None`` disables compression and
4250+
``0<complevel<10`` enables compression.
4251+
4252+
* ``complib`` specifies which compression library to use.
4253+
If nothing is specified the default library ``zlib`` is used. A
4254+
compression library usually optimizes for either good compression rates
4255+
or speed and the results will depend on the type of data. Which type of
4256+
compression to choose depends on your specific needs and data. The list
4257+
of supported compression libraries:
4258+
4259+
- `zlib <https://zlib.net/>`_: The default compression library.
4260+
A classic in terms of compression, achieves good compression
4261+
rates but is somewhat slow.
4262+
- `lzo <https://www.oberhumer.com/opensource/lzo/>`_: Fast
4263+
compression and decompression.
4264+
- `bzip2 <http://bzip.org/>`_: Good compression rates.
4265+
- `blosc <http://www.blosc.org/>`_: Fast compression and
4266+
decompression.
4267+
4268+
Support for alternative blosc compressors:
4269+
4270+
- `blosc:blosclz <http://www.blosc.org/>`_ This is the
4271+
default compressor for ``blosc``
4272+
- `blosc:lz4
4273+
<https://fastcompression.blogspot.dk/p/lz4.html>`_:
4274+
A compact, very popular and fast compressor.
4275+
- `blosc:lz4hc
4276+
<https://fastcompression.blogspot.dk/p/lz4.html>`_:
4277+
A tweaked version of LZ4, produces better
4278+
compression ratios at the expense of speed.
4279+
- `blosc:snappy <https://google.github.io/snappy/>`_:
4280+
A popular compressor used in many places.
4281+
- `blosc:zlib <https://zlib.net/>`_: A classic;
4282+
somewhat slower than the previous ones, but
4283+
achieving better compression ratios.
4284+
- `blosc:zstd <https://facebook.github.io/zstd/>`_: An
4285+
extremely well balanced codec; it provides the best
4286+
compression ratios among the others above, and at
4287+
reasonably fast speed.
4288+
4289+
If ``complib`` is defined as something other than the listed libraries a
4290+
``ValueError`` exception is issued.
42634291

42644292
.. note::
42654293

0 commit comments

Comments
 (0)