pandas-dev
diff --git a/‎doc/README.rst
Lines changed: 1 addition & 1 deletion b/‎doc/README.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/categorical.rst
Lines changed: 6 additions & 3 deletions b/‎doc/source/categorical.rst
Lines changed: 6 additions & 3 deletions
diff --git a/‎doc/source/groupby.rst
Lines changed: 15 additions & 0 deletions b/‎doc/source/groupby.rst
Lines changed: 15 additions & 0 deletions
diff --git a/‎doc/source/io.rst
Lines changed: 70 additions & 0 deletions b/‎doc/source/io.rst
Lines changed: 70 additions & 0 deletions
diff --git a/‎doc/source/remote_data.rst
Lines changed: 62 additions & 7 deletions b/‎doc/source/remote_data.rst
Lines changed: 62 additions & 7 deletions
diff --git a/‎doc/source/whatsnew/v0.15.2.txt
Lines changed: 10 additions & 2 deletions b/‎doc/source/whatsnew/v0.15.2.txt
Lines changed: 10 additions & 2 deletions
diff --git a/‎pandas/computation/pytables.py
Lines changed: 16 additions & 1 deletion b/‎pandas/computation/pytables.py
Lines changed: 16 additions & 1 deletion
diff --git a/‎pandas/core/categorical.py
Lines changed: 10 additions & 2 deletions b/‎pandas/core/categorical.py
Lines changed: 10 additions & 2 deletions
diff --git a/‎pandas/core/internals.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/internals.py
Lines changed: 1 addition & 1 deletion
@@ -132,7 +132,7 @@ If you want to do a full clean build, do::
     python make.py build
 
 
-Staring with 0.13.1 you can tell ``make.py`` to compile only a single section
+Starting with 0.13.1 you can tell ``make.py`` to compile only a single section
 of the docs, greatly reducing the turn-around time for checking your changes.
 You will be prompted to delete `.rst` files that aren't required, since the
 last committed version can always be restored from git.
 
@@ -541,8 +541,12 @@ The same applies to ``df.append(df_different)``.
 Getting Data In/Out
 -------------------
 
-Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype will currently
-raise ``NotImplementedError``.
+.. versionadded:: 0.15.2
+
+Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype was implemented
+in 0.15.2. See :ref:`here <io.hdf5-categorical>` for an example and caveats.
+
+Writing data to/from Stata format files was implemented in 0.15.2.
 
 Writing to a CSV file will convert the data, effectively removing any information about the
 categorical (categories and ordering). So if you read back the CSV file you have to convert the
@@ -805,4 +809,3 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse `Categorical
     This also happens in some cases when you supply a `numpy` array instead of a `Categorical`:
     using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behaviour, while using
     a string array (e.g. ``np.array(["a","b","c","a"])``) will not.
-
 
@@ -338,6 +338,21 @@ In the case of grouping by multiple keys, the group name will be a tuple:
 It's standard Python-fu but remember you can unpack the tuple in the for loop
 statement if you wish: ``for (k1, k2), group in grouped:``.
 
+Selecting a group
+-----------------
+
+A single group can be selected using ``GroupBy.get_group()``:
+
+.. ipython:: python
+
+   grouped.get_group('bar')
+   
+Or for an object grouped on multiple columns:
+
+.. ipython:: python
+
+   df.groupby(['A', 'B']).get_group(('bar', 'one'))
+
 .. _groupby.aggregate:
 
 Aggregation
 
@@ -1992,6 +1992,27 @@ indices to be parsed.
 
    read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3])
 
+.. note::
+
+   It is possible to transform the contents of Excel cells via the `converters`
+   option. For instance, to convert a column to boolean:
+
+   .. code-block:: python
+
+      read_excel('path_to_file.xls', 'Sheet1', converters={'MyBools': bool})
+
+   This options handles missing values and treats exceptions in the converters
+   as missing data. Transformations are applied cell by cell rather than to the
+   column as a whole, so the array dtype is not guaranteed. For instance, a
+   column of integers with missing values cannot be transformed to an array
+   with integer dtype, because NaN is strictly a float. You can manually mask
+   missing data to recover integer dtype:
+
+   .. code-block:: python
+
+      cfun = lambda x: int(x) if x else -1
+      read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun})
+
 To write a DataFrame object to a sheet of an Excel file, you can use the
 ``to_excel`` instance method.  The arguments are largely the same as ``to_csv``
 described above, the first argument being the name of the excel file, and the
@@ -3070,6 +3091,53 @@ conversion may not be necessary in future versions of pandas)
        df
        df.dtypes
 
+.. _io.hdf5-categorical:
+
+Categorical Data
+~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.15.2
+
+Writing data to a ``HDFStore`` that contains a ``category`` dtype was implemented
+in 0.15.2. Queries work the same as if it was an object array. However, the ``category`` dtyped data is
+stored in a more efficient manner.
+
+.. ipython:: python
+
+   dfcat = DataFrame({ 'A' : Series(list('aabbcdba')).astype('category'),
+                       'B' : np.random.randn(8) })
+   dfcat
+   dfcat.dtypes
+   cstore = pd.HDFStore('cats.h5', mode='w')
+   cstore.append('dfcat', dfcat, format='table', data_columns=['A'])
+   result = cstore.select('dfcat', where="A in ['b','c']")
+   result
+   result.dtypes
+
+.. warning::
+
+   The format of the ``Categorical`` is readable by prior versions of pandas (< 0.15.2), but will retrieve
+   the data as an integer based column (e.g. the ``codes``). However, the ``categories`` *can* be retrieved
+   but require the user to select them manually using the explicit meta path.
+
+   The data is stored like so:
+
+   .. ipython:: python
+
+      cstore
+
+      # to get the categories
+      cstore.select('dfcat/meta/A/meta')
+
+.. ipython:: python
+   :suppress:
+   :okexcept:
+
+   cstore.close()
+   import os
+   os.remove('cats.h5')
+
+
 String Columns
 ~~~~~~~~~~~~~~
 
@@ -3639,6 +3707,8 @@ outside of this range, the data is cast to ``int16``.
   data frames containing categorical data will convert non-string categorical values
   to strings.
 
+Writing data to/from Stata format files with a ``category`` dtype was implemented in 0.15.2.
+
 .. _io.stata_reader:
 
 Reading from STATA format
 
@@ -27,14 +27,14 @@ Remote Data Access
 
 .. _remote_data.data_reader:
 
-Functions from :mod:`pandas.io.data` extract data from various Internet
-sources into a DataFrame. Currently the following sources are supported:
+Functions from :mod:`pandas.io.data` and :mod:`pandas.io.ga` extract data from various Internet sources into a DataFrame. Currently the following sources are supported:
 
-    - Yahoo! Finance
-    - Google Finance
-    - St. Louis FED (FRED)
-    - Kenneth French's data library
-    - World Bank
+    - :ref:`Yahoo! Finance<remote_data.yahoo>`
+    - :ref:`Google Finance<remote_data.google>`
+    - :ref:`St.Louis FED (FRED)<remote_data.fred>`
+    - :ref:`Kenneth French's data library<remote_data.ff>`
+    - :ref:`World Bank<remote_data.wb>`
+    - :ref:`Google Analytics<remote_data.ga>`
 
 It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ.
 
@@ -330,7 +330,62 @@ indicators, or a single "bad" (#4 above) country code).
 
 See docstrings for more info.
 
+.. _remote_data.ga:
 
+Google Analytics
+----------------
 
+The :mod:`~pandas.io.ga` module provides a wrapper for
+`Google Analytics API <https://developers.google.com/analytics/devguides>`__
+to simplify retrieving traffic data.
+Result sets are parsed into a pandas DataFrame with a shape and data types
+derived from the source table.
 
+Configuring Access to Google Analytics
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The first thing you need to do is to setup accesses to Google Analytics API. Follow the steps below:
+
+#. In the `Google Developers Console <https://console.developers.google.com>`__
+    #. enable the Analytics API
+    #. create a new project
+    #. create a new Client ID for an "Installed Application" (in the "APIs & auth / Credentials section" of the newly created project)
+    #. download it (JSON file)
+#. On your machine
+    #. rename it to ``client_secrets.json``
+    #. move it to the ``pandas/io`` module directory
+
+The first time you use the :func:`read_ga` funtion, a browser window will open to ask you to authentify to the Google API. Do proceed.
+
+Using the Google Analytics API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following will fetch users and pageviews (metrics) data per day of the week, for the first semester of 2014, from a particular property.
+
+.. code-block:: python
+
+    import pandas.io.ga as ga
+    ga.read_ga(
+        account_id  = "2360420",
+        profile_id  = "19462946",
+        property_id = "UA-2360420-5",
+        metrics     = ['users', 'pageviews'],
+        dimensions  = ['dayOfWeek'],
+        start_date  = "2014-01-01",
+        end_date    = "2014-08-01",
+        index_col   = 0,
+        filters     = "pagePath=~aboutus;ga:country==France",
+    )
+
+The only mandatory arguments are ``metrics,`` ``dimensions`` and ``start_date``. We can only strongly recommend you to always specify the ``account_id``, ``profile_id`` and ``property_id`` to avoid accessing the wrong data bucket in Google Analytics.
+
+The ``index_col`` argument indicates which dimension(s) has to be taken as index.
+
+The ``filters`` argument indicates the filtering to apply to the query. In the above example, the page has URL has to contain ``aboutus`` AND the visitors country has to be France.
+
+Detailed informations in the followings:
+
+* `pandas & google analytics, by yhat <http://blog.yhathq.com/posts/pandas-google-analytics.html>`__
+* `Google Analytics integration in pandas, by Chang She <http://quantabee.wordpress.com/2012/12/17/google-analytics-pandas/>`__
+* `Google Analytics Dimensions and Metrics Reference <https://developers.google.com/analytics/devguides/reporting/core/dimsmets>`_
 
@@ -42,6 +42,9 @@ Enhancements
 ~~~~~~~~~~~~
 
 - Added ability to export Categorical data to Stata (:issue:`8633`).
+- Added ability to export Categorical data to to/from HDF5 (:issue:`7621`). Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. See :ref:`here <io.hdf5-categorical>` for an example and caveats w.r.t. prior versions of pandas.
+- Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`).
+- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See :ref:`here<remote_data.ga>`.
 
 .. _whatsnew_0152.performance:
 
@@ -58,12 +61,14 @@ Experimental
 
 Bug Fixes
 ~~~~~~~~~
+- Bug in packaging pandas with ``py2app/cx_Freeze`` (:issue:`8602`, :issue:`8831`)
 - Bug in ``groupby`` signatures that didn't include \*args or \*\*kwargs (:issue:`8733`).
 - ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo and when it receives no data from Yahoo (:issue:`8761`), (:issue:`8783`).
+- Unclear error message in csv parsing when passing dtype and names and the parsed data is a different data type (:issue:`8833`)
 - Bug in slicing a multi-index with an empty list and at least one boolean indexer (:issue:`8781`)
 - ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`).
 - ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`).
-
+- ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`)
 
 
 
@@ -93,4 +98,7 @@ Bug Fixes
 
 - Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference
   when the index contained DST days (:issue:`8772`).
-- Regression in ``Timestamp`` does not parse 'Z' zone designator for UTC (:issue:`8771`)
+- Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`).
+
+- Bugs when trying to stack multiple columns, when some (or all)
+  of the level names are numbers (:issue:`8584`).
@@ -147,7 +147,17 @@ def is_in_table(self):
     @property
     def kind(self):
         """ the kind of my field """
-        return self.queryables.get(self.lhs)
+        return getattr(self.queryables.get(self.lhs),'kind',None)
+
+    @property
+    def meta(self):
+        """ the meta of my field """
+        return getattr(self.queryables.get(self.lhs),'meta',None)
+
+    @property
+    def metadata(self):
+        """ the metadata of my field """
+        return getattr(self.queryables.get(self.lhs),'metadata',None)
 
     def generate(self, v):
         """ create and return the op string for this TermValue """
@@ -167,6 +177,7 @@ def stringify(value):
             return encoder(value)
 
         kind = _ensure_decoded(self.kind)
+        meta = _ensure_decoded(self.meta)
         if kind == u('datetime64') or kind == u('datetime'):
             if isinstance(v, (int, float)):
                 v = stringify(v)
@@ -182,6 +193,10 @@ def stringify(value):
         elif kind == u('timedelta64') or kind == u('timedelta'):
             v = _coerce_scalar_to_timedelta_type(v, unit='s').value
             return TermValue(int(v), v, kind)
+        elif meta == u('category'):
+            metadata = com._values_from_object(self.metadata)
+            result = metadata.searchsorted(v,side='left')
+            return TermValue(result, result, u('integer'))
         elif kind == u('integer'):
             v = int(float(v))
             return TermValue(v, v, kind)
 
@@ -319,6 +319,15 @@ def ndim(self):
         """Number of dimensions of the Categorical """
         return self._codes.ndim
 
+    def reshape(self, new_shape, **kwargs):
+        """ compat with .reshape """
+        return self
+
+    @property
+    def base(self):
+        """ compat, we are always our own object """
+        return None
+
     @classmethod
     def from_array(cls, data, **kwargs):
         """
@@ -363,10 +372,9 @@ def from_codes(cls, codes, categories, ordered=False, name=None):
 
         categories = cls._validate_categories(categories)
 
-        if codes.max() >= len(categories) or codes.min() < -1:
+        if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
             raise ValueError("codes need to be between -1 and len(categories)-1")
 
-
         return Categorical(codes, categories=categories, ordered=ordered, name=name, fastpath=True)
 
     _codes = None
 
@@ -4381,7 +4381,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
         else:
             fill_value = upcasted_na
 
-            if self.is_null:
+            if self.is_null and not getattr(self.block,'is_categorical',None):
                 missing_arr = np.empty(self.shape, dtype=empty_dtype)
                 if np.prod(self.shape):
                     # NumPy 1.6 workaround: this statement gets strange if all