pandas-dev · jreback · Feb 17, 2014 · Jan 20, 2014
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -1504,6 +1504,18 @@ operators:
    a & b
    a - b
 
+Also available is the ``sym_diff (^)`` operation, which returns elements
+that appear in either ``idx1`` or ``idx2`` but not both. This is
+equivalent to the Index created by ``(idx1 - idx2) + (idx2 - idx1)``,
+with duplicates dropped.
+
+.. ipython:: python
+
+   idx1 = Index([1, 2, 3, 4])
+   idx2 = Index([2, 3, 4, 5])
+   idx1.sym_diff(idx2)
+   idx1 ^ idx2
+
 The ``isin`` method of Index objects
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -54,6 +54,7 @@ New features
 ~~~~~~~~~~~~
 
 - Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`)
+- Added the ``sym_diff`` method to ``Index`` (:issue:`5543`)
 
 API Changes
 ~~~~~~~~~~~

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -866,6 +866,9 @@ def __and__(self, other):
     def __or__(self, other):
         return self.union(other)
 
+    def __xor__(self, other):
+        return self.sym_diff(other)
+
     def union(self, other):
         """
         Form the union of two Index objects and sorts if possible
@@ -973,16 +976,20 @@ def diff(self, other):
         """
         Compute sorted set difference of two Index objects
 
+        Parameters
+        ----------
+        other : Index or array-like
+
+        Returns
+        -------
+        diff : Index
+
         Notes
         -----
         One can do either of these and achieve the same result
 
         >>> index - index2
         >>> index.diff(index2)
-
-        Returns
-        -------
-        diff : Index
         """
 
         if not hasattr(other, '__iter__'):
@@ -1000,6 +1007,49 @@ def diff(self, other):
         theDiff = sorted(set(self) - set(other))
         return Index(theDiff, name=result_name)
 
+    def sym_diff(self, other, result_name=None):
+        """
+        Compute the sorted symmetric_difference of two Index objects.
+
+        Parameters
+        ----------
+
+        other : array-like
+        result_name : str
+
+        Returns
+        -------
+        sym_diff : Index
+
+        Notes
+        -----
+        ``sym_diff`` contains elements that appear in either ``idx1`` or
+        ``idx2`` but not both. Equivalent to the Index created by
+        ``(idx1 - idx2) + (idx2 - idx1)`` with duplicates dropped.
+
+        Examples
+        --------
+        >>> idx1 = Index([1, 2, 3, 4])
+        >>> idx2 = Index([2, 3, 4, 5])
+        >>> idx1.sym_diff(idx2)
+        Int64Index([1, 5], dtype='int64')
+
+        You can also use the ``^`` operator:
+
+        >>> idx1 ^ idx2
+        Int64Index([1, 5], dtype='int64')
+        """
+        if not hasattr(other, '__iter__'):
+            raise TypeError('Input must be iterable!')
+
+        if not isinstance(other, Index):
+            other = Index(other)
+            result_name = result_name or self.name
+
+        the_diff = sorted(set((self - other) + (other - self)))
+        return Index(the_diff, name=result_name)
+
+
     def unique(self):
         """
         Return array of unique values in the Index. Significantly faster than

diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -471,6 +471,52 @@ def test_diff(self):
         # non-iterable input
         assertRaisesRegexp(TypeError, "iterable", first.diff, 0.5)
 
+    def test_symmetric_diff(self):
+        # smoke
+        idx1 = Index([1, 2, 3, 4], name='idx1')
+        idx2 = Index([2, 3, 4, 5])
+        result = idx1.sym_diff(idx2)
+        expected = Index([1, 5])
+        self.assert_(tm.equalContents(result, expected))
+        self.assert_(result.name is None)
+
+        # __xor__ syntax
+        expected = idx1 ^ idx2
+        self.assert_(tm.equalContents(result, expected))
+        self.assert_(result.name is None)
+
+        # multiIndex
+        idx1 = MultiIndex.from_tuples(self.tuples)
+        idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)])
+        result = idx1.sym_diff(idx2)
+        expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)])
+        self.assert_(tm.equalContents(result, expected))
+
+        # nans:
+        idx1 = Index([1, 2, np.nan])
+        idx2 = Index([0, 1, np.nan])
+        result = idx1.sym_diff(idx2)
+        expected = Index([0.0, np.nan, 2.0, np.nan])  # oddness with nans
+        nans = pd.isnull(expected)
+        self.assert_(pd.isnull(result[nans]).all())
+        self.assert_(tm.equalContents(result[~nans], expected[~nans]))
+
+        # other not an Index:
+        idx1 = Index([1, 2, 3, 4], name='idx1')
+        idx2 = np.array([2, 3, 4, 5])
+        expected = Index([1, 5])
+        result = idx1.sym_diff(idx2)
+        self.assert_(tm.equalContents(result, expected))
+        self.assertEquals(result.name, 'idx1')
+
+        result = idx1.sym_diff(idx2, result_name='new_name')
+        self.assert_(tm.equalContents(result, expected))
+        self.assertEquals(result.name, 'new_name')
+
+        # other isn't iterable
+        with tm.assertRaises(TypeError):
+            idx1 - 1
+
     def test_pickle(self):
         def testit(index):
             pickled = pickle.dumps(index)