pandas-dev
diff --git a/‎.devcontainer.json
Lines changed: 28 additions & 0 deletions b/‎.devcontainer.json
Lines changed: 28 additions & 0 deletions
diff --git a/‎.travis.yml
Lines changed: 12 additions & 10 deletions b/‎.travis.yml
Lines changed: 12 additions & 10 deletions
diff --git a/‎Dockerfile
Lines changed: 47 additions & 0 deletions b/‎Dockerfile
Lines changed: 47 additions & 0 deletions
diff --git a/‎LICENSE
Lines changed: 3 additions & 1 deletion b/‎LICENSE
Lines changed: 3 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/reshape.py
Lines changed: 3 additions & 0 deletions b/‎asv_bench/benchmarks/reshape.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎ci/deps/azure-37-locale.yaml
Lines changed: 3 additions & 0 deletions b/‎ci/deps/azure-37-locale.yaml
Lines changed: 3 additions & 0 deletions
diff --git a/‎ci/deps/azure-macos-36.yaml
Lines changed: 1 addition & 0 deletions b/‎ci/deps/azure-macos-36.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎ci/deps/azure-windows-37.yaml
Lines changed: 3 additions & 0 deletions b/‎ci/deps/azure-windows-37.yaml
Lines changed: 3 additions & 0 deletions
diff --git a/‎ci/deps/travis-36-cov.yaml
Lines changed: 1 addition & 0 deletions b/‎ci/deps/travis-36-cov.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎ci/print_skipped.py
Lines changed: 1 addition & 1 deletion b/‎ci/print_skipped.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/make.py
Lines changed: 1 addition & 1 deletion b/‎doc/make.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/development/contributing.rst
Lines changed: 27 additions & 0 deletions b/‎doc/source/development/contributing.rst
Lines changed: 27 additions & 0 deletions
diff --git a/‎doc/source/ecosystem.rst
Lines changed: 4 additions & 6 deletions b/‎doc/source/ecosystem.rst
Lines changed: 4 additions & 6 deletions
diff --git a/‎doc/source/getting_started/install.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/getting_started/install.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 70 additions & 42 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 70 additions & 42 deletions
@@ -0,0 +1,28 @@
+// For format details, see https://aka.ms/vscode-remote/devcontainer.json or the definition README at
+// https://github.com/microsoft/vscode-dev-containers/tree/master/containers/python-3-miniconda
+{
+	"name": "pandas",
+	"context": ".",
+	"dockerFile": "Dockerfile",
+
+	// Use 'settings' to set *default* container specific settings.json values on container create.
+	// You can edit these settings after create using File > Preferences > Settings > Remote.
+	"settings": {
+		"terminal.integrated.shell.linux": "/bin/bash",
+		"python.condaPath": "/opt/conda/bin/conda",
+		"python.pythonPath": "/opt/conda/bin/python",
+		"python.formatting.provider": "black",
+		"python.linting.enabled": true,
+		"python.linting.flake8Enabled": true,
+		"python.linting.pylintEnabled": false,
+		"python.linting.mypyEnabled": true,
+		"python.testing.pytestEnabled": true,
+		"python.testing.cwd": "pandas/tests"
+	},
+
+	// Add the IDs of extensions you want installed when the container is created in the array below.
+	"extensions": [
+		"ms-python.python",
+		"ms-vscode.cpptools"
+	]
+}
@@ -7,10 +7,10 @@ python: 3.7
 # travis cache --delete inside the project directory from the travis command line client
 # The cache directories will be deleted if anything in ci/ changes in a commit
 cache:
- ccache: true
- directories:
-  - $HOME/.cache # cython cache
-  - $HOME/.ccache # compiler cache
+  ccache: true
+  directories:
+    - $HOME/.cache # cython cache
+    - $HOME/.ccache # compiler cache
 
 env:
   global:
@@ -20,13 +20,13 @@ env:
     - secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA="
 
 git:
-    # for cloning
-    depth: false
+  # for cloning
+  depth: false
 
 matrix:
-    fast_finish: true
+  fast_finish: true
 
-    include:
+  include:
     - env:
         - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
 
@@ -40,6 +40,9 @@ matrix:
         - postgresql
 
     - env:
+        # Enabling Deprecations when running tests
+        # PANDAS_TESTING_MODE="deprecate" causes DeprecationWarning messages to be displayed in the logs
+        # See pandas/_testing.py for more details.
         - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
       services:
         - mysql
@@ -70,7 +73,6 @@ before_install:
   # This overrides travis and tells it to look nowhere.
   - export BOTO_CONFIG=/dev/null
 
-
 install:
   - echo "install start"
   - ci/prep_cython_cache.sh
@@ -87,5 +89,5 @@ script:
 after_script:
   - echo "after_script start"
   - source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
-  - ci/print_skipped.py 
+  - ci/print_skipped.py
   - echo "after_script done"
@@ -0,0 +1,47 @@
+FROM continuumio/miniconda3
+
+# if you forked pandas, you can pass in your own GitHub username to use your fork
+# i.e. gh_username=myname
+ARG gh_username=pandas-dev
+ARG pandas_home="/home/pandas"
+
+# Avoid warnings by switching to noninteractive
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Configure apt and install packages
+RUN apt-get update \
+    && apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \
+    #
+    # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
+    && apt-get -y install git iproute2 procps iproute2 lsb-release \
+    #
+    # Install C compilers (gcc not enough, so just went with build-essential which admittedly might be overkill),
+    # needed to build pandas C extensions
+    && apt-get -y install build-essential \
+    #
+    # cleanup
+    && apt-get autoremove -y \
+    && apt-get clean -y \
+    && rm -rf /var/lib/apt/lists/*
+
+# Switch back to dialog for any ad-hoc use of apt-get
+ENV DEBIAN_FRONTEND=dialog
+
+# Clone pandas repo
+RUN mkdir "$pandas_home" \
+    && git clone "https://github.com/$gh_username/pandas.git" "$pandas_home" \
+    && cd "$pandas_home" \
+    && git remote add upstream "https://github.com/pandas-dev/pandas.git" \
+    && git pull upstream master
+
+# Because it is surprisingly difficult to activate a conda environment inside a DockerFile
+# (from personal experience and per https://github.com/ContinuumIO/docker-images/issues/89),
+# we just update the base/root one from the 'environment.yml' file instead of creating a new one.
+#
+# Set up environment
+RUN conda env update -n base -f "$pandas_home/environment.yml"
+
+# Build C extensions and pandas
+RUN cd "$pandas_home" \
+    && python setup.py build_ext --inplace -j 4 \
+    && python -m pip install -e .
@@ -1,8 +1,10 @@
 BSD 3-Clause License
 
-Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
 All rights reserved.
 
+Copyright (c) 2011-2020, Open source contributors.
+
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
 
@@ -161,6 +161,9 @@ def time_pivot_table_categorical_observed(self):
             observed=True,
         )
 
+    def time_pivot_table_margins_only_column(self):
+        self.df.pivot_table(columns=["key2", "key3"], margins=True)
+
 
 class Crosstab:
     def setup(self):
 
@@ -34,3 +34,6 @@ dependencies:
   - xlsxwriter
   - xlwt
   - pyarrow>=0.15
+  - pip
+  - pip:
+    - pyxlsb
@@ -33,3 +33,4 @@ dependencies:
   - pip
   - pip:
     - pyreadstat
+    - pyxlsb
@@ -35,3 +35,6 @@ dependencies:
   - xlsxwriter
   - xlwt
   - pyreadstat
+  - pip
+  - pip:
+    - pyxlsb
@@ -51,3 +51,4 @@ dependencies:
     - coverage
     - pandas-datareader
     - python-dateutil
+    - pyxlsb
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import os
 import xml.etree.ElementTree as et
 
 
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 """
 Python script for building documentation.
 
 
@@ -146,6 +146,17 @@ requires a C compiler and Python environment. If you're making documentation
 changes, you can skip to :ref:`contributing.documentation` but you won't be able
 to build the documentation locally before pushing your changes.
 
+Using a Docker Container
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Instead of manually setting up a development environment, you can use Docker to
+automatically create the environment with just several commands. Pandas provides a `DockerFile`
+in the root directory to build a Docker image with a full pandas development environment.
+
+Even easier, you can use the DockerFile to launch a remote session with Visual Studio Code,
+a popular free IDE, using the `.devcontainer.json` file.
+See https://code.visualstudio.com/docs/remote/containers for details.
+
 .. _contributing.dev_c:
 
 Installing a C compiler
@@ -1525,3 +1536,19 @@ The branch will still exist on GitHub, so to delete it there do::
     git push origin --delete shiny-new-feature
 
 .. _Gitter: https://gitter.im/pydata/pandas
+
+
+Tips for a successful Pull Request
+==================================
+
+If you have made it to the `Review your code`_ phase, one of the core contributors may
+take a look. Please note however that a handful of people are responsible for reviewing
+all of the contributions, which can often lead to bottlenecks.
+
+To improve the chances of your pull request being reviewed, you should:
+
+- **Reference an open issue** for non-trivial changes to clarify the PR's purpose
+- **Ensure you have appropriate tests**. These should be the first part of any PR
+- **Keep your pull requests as simple as possible**. Larger PRs take longer to review
+- **Ensure that CI is in a green state**. Reviewers may not even look otherwise
+- **Keep** `Updating your pull request`_, either by request or every few days
@@ -122,16 +122,14 @@ also goes beyond matplotlib and pandas with the option to perform statistical
 estimation while plotting, aggregating across observations and visualizing the
 fit of statistical models to emphasize patterns in a dataset.
 
-`yhat/ggpy <https://github.com/yhat/ggpy>`__
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+`plotnine <https://github.com/has2k1/plotnine/>`__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Hadley Wickham's `ggplot2 <https://ggplot2.tidyverse.org/>`__ is a foundational exploratory visualization package for the R language.
 Based on `"The Grammar of Graphics" <https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html>`__ it
 provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data.
-It's really quite incredible. Various implementations to other languages are available,
-but a faithful implementation for Python users has long been missing. Although still young
-(as of Jan-2014), the `yhat/ggpy <https://github.com/yhat/ggpy>`__ project has been
-progressing quickly in that direction.
+Various implementations to other languages are available.
+A good implementation for Python users is `has2k1/plotnine <https://github.com/has2k1/plotnine/>`__.
 
 `IPython Vega <https://github.com/vega/ipyvega>`__
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -264,6 +264,7 @@ pyarrow                   0.12.0             Parquet, ORC (requires 0.13.0), and
 pymysql                   0.7.11             MySQL engine for sqlalchemy
 pyreadstat                                   SPSS files (.sav) reading
 pytables                  3.4.2              HDF5 reading / writing
+pyxlsb                    1.0.5              Reading for xlsb files
 qtpy                                         Clipboard I/O
 s3fs                      0.3.0              Amazon S3 access
 tabulate                  0.8.3              Printing in Markdown-friendly format (see `tabulate`_)
 
@@ -23,7 +23,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
     text;`JSON <https://www.json.org/>`__;:ref:`read_json<io.json_reader>`;:ref:`to_json<io.json_writer>`
     text;`HTML <https://en.wikipedia.org/wiki/HTML>`__;:ref:`read_html<io.read_html>`;:ref:`to_html<io.html>`
     text; Local clipboard;:ref:`read_clipboard<io.clipboard>`;:ref:`to_clipboard<io.clipboard>`
-    binary;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
+    ;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
     binary;`OpenDocument <http://www.opendocumentformat.org>`__;:ref:`read_excel<io.ods>`;
     binary;`HDF5 Format <https://support.hdfgroup.org/HDF5/whatishdf5.html>`__;:ref:`read_hdf<io.hdf5>`;:ref:`to_hdf<io.hdf5>`
     binary;`Feather Format <https://github.com/wesm/feather>`__;:ref:`read_feather<io.feather>`;:ref:`to_feather<io.feather>`
@@ -2768,7 +2768,8 @@ Excel files
 
 The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``)
 files using the ``xlrd`` Python module.  Excel 2007+ (``.xlsx``) files
-can be read using either ``xlrd`` or ``openpyxl``.
+can be read using either ``xlrd`` or ``openpyxl``. Binary Excel (``.xlsb``)
+files can be read using ``pyxlsb``.
 The :meth:`~DataFrame.to_excel` instance method is used for
 saving a ``DataFrame`` to Excel.  Generally the semantics are
 similar to working with :ref:`csv<io.read_csv_table>` data.
@@ -3229,6 +3230,30 @@ OpenDocument spreadsheets match what can be done for `Excel files`_ using
    Currently pandas only supports *reading* OpenDocument spreadsheets. Writing
    is not implemented.
 
+.. _io.xlsb:
+
+Binary Excel (.xlsb) files
+--------------------------
+
+.. versionadded:: 1.0.0
+
+The :func:`~pandas.read_excel` method can also read binary Excel files
+using the ``pyxlsb`` module. The semantics and features for reading
+binary Excel files mostly match what can be done for `Excel files`_ using
+``engine='pyxlsb'``. ``pyxlsb`` does not recognize datetime types
+in files and will return floats instead.
+
+.. code-block:: python
+
+   # Returns a DataFrame
+   pd.read_excel('path_to_file.xlsb', engine='pyxlsb')
+
+.. note::
+
+   Currently pandas only supports *reading* binary Excel files. Writing
+   is not implemented.
+
+
 .. _io.clipboard:
 
 Clipboard
@@ -4220,46 +4245,49 @@ Compression
 all kinds of stores, not just tables. Two parameters are used to
 control compression: ``complevel`` and ``complib``.
 
-``complevel`` specifies if and how hard data is to be compressed.
-              ``complevel=0`` and ``complevel=None`` disables
-              compression and ``0<complevel<10`` enables compression.
-
-``complib`` specifies which compression library to use. If nothing is
-            specified the default library ``zlib`` is used. A
-            compression library usually optimizes for either good
-            compression rates or speed and the results will depend on
-            the type of data. Which type of
-            compression to choose depends on your specific needs and
-            data. The list of supported compression libraries:
-
-             - `zlib <https://zlib.net/>`_: The default compression library. A classic in terms of compression, achieves good compression rates but is somewhat slow.
-             - `lzo <https://www.oberhumer.com/opensource/lzo/>`_: Fast compression and decompression.
-             - `bzip2 <http://bzip.org/>`_: Good compression rates.
-             - `blosc <http://www.blosc.org/>`_: Fast compression and decompression.
-
-                Support for alternative blosc compressors:
-
-                - `blosc:blosclz <http://www.blosc.org/>`_ This is the
-                  default compressor for ``blosc``
-                - `blosc:lz4
-                  <https://fastcompression.blogspot.dk/p/lz4.html>`_:
-                  A compact, very popular and fast compressor.
-                - `blosc:lz4hc
-                  <https://fastcompression.blogspot.dk/p/lz4.html>`_:
-                  A tweaked version of LZ4, produces better
-                  compression ratios at the expense of speed.
-                - `blosc:snappy <https://google.github.io/snappy/>`_:
-                  A popular compressor used in many places.
-                - `blosc:zlib <https://zlib.net/>`_: A classic;
-                  somewhat slower than the previous ones, but
-                  achieving better compression ratios.
-                - `blosc:zstd <https://facebook.github.io/zstd/>`_: An
-                  extremely well balanced codec; it provides the best
-                  compression ratios among the others above, and at
-                  reasonably fast speed.
-
-             If ``complib`` is defined as something other than the
-             listed libraries a ``ValueError`` exception is issued.
+* ``complevel`` specifies if and how hard data is to be compressed.
+  ``complevel=0`` and ``complevel=None`` disables compression and
+  ``0<complevel<10`` enables compression.
+
+* ``complib`` specifies which compression library to use.
+  If nothing is  specified the default library ``zlib`` is used. A
+  compression library usually optimizes for either good compression rates
+  or speed and the results will depend on the type of data. Which type of
+  compression to choose depends on your specific needs and data. The list
+  of supported compression libraries:
+
+  - `zlib <https://zlib.net/>`_: The default compression library.
+    A classic in terms of compression, achieves good compression
+    rates but is somewhat slow.
+  - `lzo <https://www.oberhumer.com/opensource/lzo/>`_: Fast
+    compression and decompression.
+  - `bzip2 <http://bzip.org/>`_: Good compression rates.
+  - `blosc <http://www.blosc.org/>`_: Fast compression and
+    decompression.
+
+    Support for alternative blosc compressors:
+
+    - `blosc:blosclz <http://www.blosc.org/>`_ This is the
+      default compressor for ``blosc``
+    - `blosc:lz4
+      <https://fastcompression.blogspot.dk/p/lz4.html>`_:
+      A compact, very popular and fast compressor.
+    - `blosc:lz4hc
+      <https://fastcompression.blogspot.dk/p/lz4.html>`_:
+      A tweaked version of LZ4, produces better
+      compression ratios at the expense of speed.
+    - `blosc:snappy <https://google.github.io/snappy/>`_:
+      A popular compressor used in many places.
+    - `blosc:zlib <https://zlib.net/>`_: A classic;
+      somewhat slower than the previous ones, but
+      achieving better compression ratios.
+    - `blosc:zstd <https://facebook.github.io/zstd/>`_: An
+      extremely well balanced codec; it provides the best
+      compression ratios among the others above, and at
+      reasonably fast speed.
+
+  If ``complib`` is defined as something other than the listed libraries a
+  ``ValueError`` exception is issued.
 
 .. note::
Original file line number	Diff line number	Diff line change
`@@ -161,6 +161,9 @@ def time_pivot_table_categorical_observed(self):`
`161`	`161`	`observed=True,`
`162`	`162`	`)`
`163`	`163`
	`164`	`+ def time_pivot_table_margins_only_column(self):`
	`165`	`+ self.df.pivot_table(columns=["key2", "key3"], margins=True)`
	`166`	`+`
`164`	`167`
`165`	`168`	`class Crosstab:`
`166`	`169`	`def setup(self):`
-Original file line number
+Diff line change
   - pip
   - pip:
     - pyreadstat
 +    - pyxlsb
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-#!/usr/bin/env python`
	`1`	`+#!/usr/bin/env python3`
`2`	`2`	`import os`
`3`	`3`	`import xml.etree.ElementTree as et`
`4`	`4`