diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 0c60aeeae333b..96f725bd0b22e 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -307,6 +307,29 @@ Google BigQuery Enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 - The :func:`pandas.io.gbq.read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs <io.bigquery_reader>` for more details (:issue:`13615`).
 
+.. _whatsnew_0190.sparse:
+
+Sparse changes
+~~~~~~~~~~~~~~
+
+These changes allow pandas to handle sparse data with more dtypes, and for work to make a smoother experience with data handling.
+
+- Sparse data structure now can preserve ``dtype`` after arithmetic ops (:issue:`13848`)
+
+.. ipython:: python
+
+   s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64)
+   s.dtype
+
+   s + 1
+
+
+- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
+- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
+- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
+- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
+- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value``  (:issue:`13866`)
+
 .. _whatsnew_0190.enhancements.other:
 
 Other enhancements
@@ -754,11 +777,6 @@ Bug Fixes
 - Bug in ``groupby().shift()``, which could cause a segfault or corruption in rare circumstances when grouping by columns with missing values (:issue:`13813`)
 - Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
 - Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
-- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
-- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
-- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
-- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
-- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value``  (:issue:`13866`)
 - Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
 - Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`)
 - Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`)
diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py
index a0dbb35bffe92..8c821f1e91874 100644
--- a/pandas/sparse/array.py
+++ b/pandas/sparse/array.py
@@ -48,16 +48,14 @@ def wrapper(self, other):
                 raise AssertionError("length mismatch: %d vs. %d" %
                                      (len(self), len(other)))
             if not isinstance(other, ABCSparseArray):
-                other = SparseArray(other, fill_value=self.fill_value)
-            if name[0] == 'r':
-                return _sparse_array_op(other, self, op, name[1:])
-            else:
-                return _sparse_array_op(self, other, op, name)
+                dtype = getattr(other, 'dtype', None)
+                other = SparseArray(other, fill_value=self.fill_value,
+                                    dtype=dtype)
+            return _sparse_array_op(self, other, op, name)
         elif is_scalar(other):
-            new_fill_value = op(np.float64(self.fill_value), np.float64(other))
-
+            fill = op(_get_fill(self), np.asarray(other))
             return _wrap_result(name, op(self.sp_values, other),
-                                self.sp_index, new_fill_value)
+                                self.sp_index, fill)
         else:  # pragma: no cover
             raise TypeError('operation with %s not supported' % type(other))
 
@@ -67,33 +65,74 @@ def wrapper(self, other):
     return wrapper
 
 
-def _sparse_array_op(left, right, op, name):
-    if left.sp_index.equals(right.sp_index):
-        result = op(left.sp_values, right.sp_values)
-        result_index = left.sp_index
+def _maybe_match_dtype(left, right):
+    if not hasattr(right, 'dtype'):
+        return left.dtype
+    elif left.dtype == right.dtype:
+        return getattr(left.dtype, '__name__', left.dtype)
     else:
-        sparse_op = getattr(splib, 'sparse_%s' % name)
-        result, result_index = sparse_op(left.sp_values, left.sp_index,
-                                         left.fill_value, right.sp_values,
-                                         right.sp_index, right.fill_value)
+        # ToDo: to be supported after GH 667
+        raise NotImplementedError('dtypes must be identical')
+
+
+def _get_fill(arr):
+    # coerce fill_value to arr dtype if possible
+    # int64 SparseArray can have NaN as fill_value if there is no missing
     try:
-        fill_value = op(left.fill_value, right.fill_value)
-    except:
-        fill_value = nan
-    return _wrap_result(name, result, result_index, fill_value)
+        return np.asarray(arr.fill_value, dtype=arr.dtype)
+    except ValueError:
+        return np.asarray(arr.fill_value)
 
 
-def _wrap_result(name, data, sparse_index, fill_value):
+def _sparse_array_op(left, right, op, name, series=False):
+
+    if series and is_integer_dtype(left) and is_integer_dtype(right):
+        # series coerces to float64 if result should have NaN/inf
+        if name in ('floordiv', 'mod') and (right.values == 0).any():
+            left = left.astype(np.float64)
+            right = right.astype(np.float64)
+        elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
+            left = left.astype(np.float64)
+            right = right.astype(np.float64)
+
+    dtype = _maybe_match_dtype(left, right)
+
+    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
+        result = op(left.get_values(), right.get_values())
+
+        if left.sp_index.ngaps == 0:
+            index = left.sp_index
+        else:
+            index = right.sp_index
+        fill = op(_get_fill(left), _get_fill(right))
+    elif left.sp_index.equals(right.sp_index):
+        result = op(left.sp_values, right.sp_values)
+        index = left.sp_index
+        fill = op(_get_fill(left), _get_fill(right))
+    else:
+        if name[0] == 'r':
+            left, right = right, left
+            name = name[1:]
+
+        opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
+        sparse_op = getattr(splib, opname)
+
+        result, index, fill = sparse_op(left.sp_values, left.sp_index,
+                                        left.fill_value, right.sp_values,
+                                        right.sp_index, right.fill_value)
+    return _wrap_result(name, result, index, fill, dtype=result.dtype)
+
+
+def _wrap_result(name, data, sparse_index, fill_value, dtype=None):
     """ wrap op result to have correct dtype """
     if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
         # ToDo: We can remove this condition when removing
         # SparseArray's dtype default when closing GH 667
-        return SparseArray(data, sparse_index=sparse_index,
-                           fill_value=fill_value,
-                           dtype=np.bool)
-    else:
-        return SparseArray(data, sparse_index=sparse_index,
-                           fill_value=fill_value)
+        dtype = np.bool
+    elif name == 'truediv':
+        dtype = np.float64
+    return SparseArray(data, sparse_index=sparse_index,
+                       fill_value=fill_value, dtype=dtype)
 
 
 class SparseArray(PandasObject, np.ndarray):
@@ -419,7 +458,12 @@ def astype(self, dtype=None):
         dtype = np.dtype(dtype)
         if dtype is not None and dtype not in (np.float_, float):
             raise TypeError('Can only support floating point data for now')
-        return self.copy()
+
+        if self.dtype == dtype:
+            return self.copy()
+        else:
+            return self._simple_new(self.sp_values.astype(dtype),
+                                    self.sp_index, float(self.fill_value))
 
     def copy(self, deep=True):
         """
diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
index 6c4392dbf7cb4..dc72bc550a0af 100644
--- a/pandas/sparse/series.py
+++ b/pandas/sparse/series.py
@@ -57,16 +57,9 @@ def wrapper(self, other):
         elif isinstance(other, DataFrame):
             return NotImplemented
         elif is_scalar(other):
-            if isnull(other) or isnull(self.fill_value):
-                new_fill_value = np.nan
-            else:
-                new_fill_value = op(np.float64(self.fill_value),
-                                    np.float64(other))
-
-            return self._constructor(op(self.sp_values, other),
+            new_values = op(self.values, other)
+            return self._constructor(new_values,
                                      index=self.index,
-                                     sparse_index=self.sp_index,
-                                     fill_value=new_fill_value,
                                      name=self.name)
         else:  # pragma: no cover
             raise TypeError('operation with %s not supported' % type(other))
@@ -84,7 +77,8 @@ def _sparse_series_op(left, right, op, name):
     new_index = left.index
     new_name = _maybe_match_name(left, right)
 
-    result = _sparse_array_op(left, right, op, name)
+    result = _sparse_array_op(left.values, right.values, op, name,
+                              series=True)
     return left._constructor(result, index=new_index, name=new_name)
 
 
diff --git a/pandas/sparse/tests/test_arithmetics.py b/pandas/sparse/tests/test_arithmetics.py
new file mode 100644
index 0000000000000..87efc362581cd
--- /dev/null
+++ b/pandas/sparse/tests/test_arithmetics.py
@@ -0,0 +1,346 @@
+import numpy as np
+import pandas as pd
+import pandas.util.testing as tm
+
+
+class TestSparseArrayArithmetics(tm.TestCase):
+
+    _multiprocess_can_split_ = True
+
+    _base = np.array
+    _klass = pd.SparseArray
+
+    def _assert(self, a, b):
+        tm.assert_numpy_array_equal(a, b)
+
+    def _check_numeric_ops(self, a, b, a_dense, b_dense):
+        # sparse & sparse
+        self._assert((a + b).to_dense(), a_dense + b_dense)
+        self._assert((b + a).to_dense(), b_dense + a_dense)
+
+        self._assert((a - b).to_dense(), a_dense - b_dense)
+        self._assert((b - a).to_dense(), b_dense - a_dense)
+
+        self._assert((a * b).to_dense(), a_dense * b_dense)
+        self._assert((b * a).to_dense(), b_dense * a_dense)
+
+        # pandas uses future division
+        self._assert((a / b).to_dense(), a_dense * 1.0 / b_dense)
+        self._assert((b / a).to_dense(), b_dense * 1.0 / a_dense)
+
+        # ToDo: FIXME in GH 13843
+        if not (self._base == pd.Series and a.dtype == 'int64'):
+            self._assert((a // b).to_dense(), a_dense // b_dense)
+            self._assert((b // a).to_dense(), b_dense // a_dense)
+
+        self._assert((a % b).to_dense(), a_dense % b_dense)
+        self._assert((b % a).to_dense(), b_dense % a_dense)
+
+        self._assert((a ** b).to_dense(), a_dense ** b_dense)
+        self._assert((b ** a).to_dense(), b_dense ** a_dense)
+
+        # sparse & dense
+        self._assert((a + b_dense).to_dense(), a_dense + b_dense)
+        self._assert((b_dense + a).to_dense(), b_dense + a_dense)
+
+        self._assert((a - b_dense).to_dense(), a_dense - b_dense)
+        self._assert((b_dense - a).to_dense(), b_dense - a_dense)
+
+        self._assert((a * b_dense).to_dense(), a_dense * b_dense)
+        self._assert((b_dense * a).to_dense(), b_dense * a_dense)
+
+        # pandas uses future division
+        self._assert((a / b_dense).to_dense(), a_dense * 1.0 / b_dense)
+        self._assert((b_dense / a).to_dense(), b_dense * 1.0 / a_dense)
+
+        # ToDo: FIXME in GH 13843
+        if not (self._base == pd.Series and a.dtype == 'int64'):
+            self._assert((a // b_dense).to_dense(), a_dense // b_dense)
+            self._assert((b_dense // a).to_dense(), b_dense // a_dense)
+
+        self._assert((a % b_dense).to_dense(), a_dense % b_dense)
+        self._assert((b_dense % a).to_dense(), b_dense % a_dense)
+
+        self._assert((a ** b_dense).to_dense(), a_dense ** b_dense)
+        self._assert((b_dense ** a).to_dense(), b_dense ** a_dense)
+
+    def _check_bool_result(self, res):
+        tm.assertIsInstance(res, self._klass)
+        self.assertEqual(res.dtype, np.bool)
+        self.assertIsInstance(res.fill_value, bool)
+
+    def _check_comparison_ops(self, a, b, a_dense, b_dense):
+        # sparse & sparse
+        self._check_bool_result(a == b)
+        self._assert((a == b).to_dense(), a_dense == b_dense)
+
+        self._check_bool_result(a != b)
+        self._assert((a != b).to_dense(), a_dense != b_dense)
+
+        self._check_bool_result(a >= b)
+        self._assert((a >= b).to_dense(), a_dense >= b_dense)
+
+        self._check_bool_result(a <= b)
+        self._assert((a <= b).to_dense(), a_dense <= b_dense)
+
+        self._check_bool_result(a > b)
+        self._assert((a > b).to_dense(), a_dense > b_dense)
+
+        self._check_bool_result(a < b)
+        self._assert((a < b).to_dense(), a_dense < b_dense)
+
+        # sparse & dense
+        self._check_bool_result(a == b_dense)
+        self._assert((a == b_dense).to_dense(), a_dense == b_dense)
+
+        self._check_bool_result(a != b_dense)
+        self._assert((a != b_dense).to_dense(), a_dense != b_dense)
+
+        self._check_bool_result(a >= b_dense)
+        self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
+
+        self._check_bool_result(a <= b_dense)
+        self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
+
+        self._check_bool_result(a > b_dense)
+        self._assert((a > b_dense).to_dense(), a_dense > b_dense)
+
+        self._check_bool_result(a < b_dense)
+        self._assert((a < b_dense).to_dense(), a_dense < b_dense)
+
+    def test_float_scalar(self):
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+
+        for kind in ['integer', 'block']:
+            a = self._klass(values, kind=kind)
+            self._check_numeric_ops(a, 1, values, 1)
+            self._check_numeric_ops(a, 0, values, 0)
+            self._check_numeric_ops(a, 3, values, 3)
+
+            a = self._klass(values, kind=kind, fill_value=0)
+            self._check_numeric_ops(a, 1, values, 1)
+            self._check_numeric_ops(a, 0, values, 0)
+            self._check_numeric_ops(a, 3, values, 3)
+
+            a = self._klass(values, kind=kind, fill_value=2)
+            self._check_numeric_ops(a, 1, values, 1)
+            self._check_numeric_ops(a, 0, values, 0)
+            self._check_numeric_ops(a, 3, values, 3)
+
+    def test_float_scalar_comparison(self):
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+
+        for kind in ['integer', 'block']:
+            a = self._klass(values, kind=kind)
+            self._check_comparison_ops(a, 1, values, 1)
+            self._check_comparison_ops(a, 0, values, 0)
+            self._check_comparison_ops(a, 3, values, 3)
+
+            a = self._klass(values, kind=kind, fill_value=0)
+            self._check_comparison_ops(a, 1, values, 1)
+            self._check_comparison_ops(a, 0, values, 0)
+            self._check_comparison_ops(a, 3, values, 3)
+
+            a = self._klass(values, kind=kind, fill_value=2)
+            self._check_comparison_ops(a, 1, values, 1)
+            self._check_comparison_ops(a, 0, values, 0)
+            self._check_comparison_ops(a, 3, values, 3)
+
+    def test_float_same_index(self):
+        # when sp_index are the same
+        for kind in ['integer', 'block']:
+            values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+            rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
+
+            a = self._klass(values, kind=kind)
+            b = self._klass(rvalues, kind=kind)
+            self._check_numeric_ops(a, b, values, rvalues)
+
+            values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
+            rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
+
+            a = self._klass(values, kind=kind, fill_value=0)
+            b = self._klass(rvalues, kind=kind, fill_value=0)
+            self._check_numeric_ops(a, b, values, rvalues)
+
+    def test_float_same_index_comparison(self):
+        # when sp_index are the same
+        for kind in ['integer', 'block']:
+            values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+            rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
+
+            a = self._klass(values, kind=kind)
+            b = self._klass(rvalues, kind=kind)
+            self._check_comparison_ops(a, b, values, rvalues)
+
+            values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
+            rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
+
+            a = self._klass(values, kind=kind, fill_value=0)
+            b = self._klass(rvalues, kind=kind, fill_value=0)
+            self._check_comparison_ops(a, b, values, rvalues)
+
+    def test_float_array(self):
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
+
+        for kind in ['integer', 'block']:
+            a = self._klass(values, kind=kind)
+            b = self._klass(rvalues, kind=kind)
+            self._check_numeric_ops(a, b, values, rvalues)
+            self._check_numeric_ops(a, b * 0, values, rvalues * 0)
+
+            a = self._klass(values, kind=kind, fill_value=0)
+            b = self._klass(rvalues, kind=kind)
+            self._check_numeric_ops(a, b, values, rvalues)
+
+            a = self._klass(values, kind=kind, fill_value=0)
+            b = self._klass(rvalues, kind=kind, fill_value=0)
+            self._check_numeric_ops(a, b, values, rvalues)
+
+            a = self._klass(values, kind=kind, fill_value=1)
+            b = self._klass(rvalues, kind=kind, fill_value=2)
+            self._check_numeric_ops(a, b, values, rvalues)
+
+    def test_float_array_different_kind(self):
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
+
+        a = self._klass(values, kind='integer')
+        b = self._klass(rvalues, kind='block')
+        self._check_numeric_ops(a, b, values, rvalues)
+        self._check_numeric_ops(a, b * 0, values, rvalues * 0)
+
+        a = self._klass(values, kind='integer', fill_value=0)
+        b = self._klass(rvalues, kind='block')
+        self._check_numeric_ops(a, b, values, rvalues)
+
+        a = self._klass(values, kind='integer', fill_value=0)
+        b = self._klass(rvalues, kind='block', fill_value=0)
+        self._check_numeric_ops(a, b, values, rvalues)
+
+        a = self._klass(values, kind='integer', fill_value=1)
+        b = self._klass(rvalues, kind='block', fill_value=2)
+        self._check_numeric_ops(a, b, values, rvalues)
+
+    def test_float_array_comparison(self):
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
+
+        for kind in ['integer', 'block']:
+            a = self._klass(values, kind=kind)
+            b = self._klass(rvalues, kind=kind)
+            self._check_comparison_ops(a, b, values, rvalues)
+            self._check_comparison_ops(a, b * 0, values, rvalues * 0)
+
+            a = self._klass(values, kind=kind, fill_value=0)
+            b = self._klass(rvalues, kind=kind)
+            self._check_comparison_ops(a, b, values, rvalues)
+
+            a = self._klass(values, kind=kind, fill_value=0)
+            b = self._klass(rvalues, kind=kind, fill_value=0)
+            self._check_comparison_ops(a, b, values, rvalues)
+
+            a = self._klass(values, kind=kind, fill_value=1)
+            b = self._klass(rvalues, kind=kind, fill_value=2)
+            self._check_comparison_ops(a, b, values, rvalues)
+
+    def test_int_array(self):
+        # have to specify dtype explicitly until fixing GH 667
+        dtype = np.int64
+
+        values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
+        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
+
+        for kind in ['integer', 'block']:
+            a = self._klass(values, dtype=dtype, kind=kind)
+            self.assertEqual(a.dtype, dtype)
+            b = self._klass(rvalues, dtype=dtype, kind=kind)
+            self.assertEqual(b.dtype, dtype)
+
+            self._check_numeric_ops(a, b, values, rvalues)
+            self._check_numeric_ops(a, b * 0, values, rvalues * 0)
+
+            a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
+            self.assertEqual(a.dtype, dtype)
+            b = self._klass(rvalues, dtype=dtype, kind=kind)
+            self.assertEqual(b.dtype, dtype)
+
+            self._check_numeric_ops(a, b, values, rvalues)
+
+            a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
+            self.assertEqual(a.dtype, dtype)
+            b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
+            self.assertEqual(b.dtype, dtype)
+            self._check_numeric_ops(a, b, values, rvalues)
+
+            a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
+            self.assertEqual(a.dtype, dtype)
+            b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
+            self.assertEqual(b.dtype, dtype)
+            self._check_numeric_ops(a, b, values, rvalues)
+
+    def test_int_array_comparison(self):
+        values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0])
+        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0])
+
+        dtype = np.int64
+
+        for kind in ['integer', 'block']:
+            a = self._klass(values, dtype=dtype, kind=kind)
+            b = self._klass(rvalues, dtype=dtype, kind=kind)
+            self._check_comparison_ops(a, b, values, rvalues)
+            self._check_comparison_ops(a, b * 0, values, rvalues * 0)
+
+            a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
+            b = self._klass(rvalues, dtype=dtype, kind=kind)
+            self._check_comparison_ops(a, b, values, rvalues)
+
+            a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
+            b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0)
+            self._check_comparison_ops(a, b, values, rvalues)
+
+            a = self._klass(values, dtype=dtype, kind=kind, fill_value=1)
+            b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
+            self._check_comparison_ops(a, b, values, rvalues)
+
+
+class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
+
+    _base = pd.Series
+    _klass = pd.SparseSeries
+
+    def _assert(self, a, b):
+        tm.assert_series_equal(a, b)
+
+    def _check_bool_result(self, res):
+        # ToDo: Must return SparseSeries after GH 667
+        tm.assertIsInstance(res, self._base)
+        self.assertEqual(res.dtype, np.bool)
+
+    def test_alignment(self):
+        da = pd.Series(np.arange(4))
+        db = pd.Series(np.arange(4), index=[1, 2, 3, 4])
+
+        sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
+        sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
+                             dtype=np.int64, fill_value=0)
+        self._check_numeric_ops(sa, sb, da, db)
+
+        sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
+        sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
+                             dtype=np.int64, fill_value=np.nan)
+        self._check_numeric_ops(sa, sb, da, db)
+
+        da = pd.Series(np.arange(4))
+        db = pd.Series(np.arange(4), index=[10, 11, 12, 13])
+
+        sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
+        sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
+                             dtype=np.int64, fill_value=0)
+        self._check_numeric_ops(sa, sb, da, db)
+
+        sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
+        sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
+                             dtype=np.int64, fill_value=np.nan)
+        self._check_numeric_ops(sa, sb, da, db)
diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py
index dd2126d0f52d2..2dccb571b1d75 100644
--- a/pandas/sparse/tests/test_array.py
+++ b/pandas/sparse/tests/test_array.py
@@ -539,195 +539,6 @@ def test_fillna_overlap(self):
         tm.assert_sp_array_equal(res, exp)
 
 
-class TestSparseArrayArithmetic(tm.TestCase):
-
-    _multiprocess_can_split_ = True
-
-    def _check_numeric_ops(self, a, b, a_dense, b_dense):
-        tm.assert_numpy_array_equal((a + b).to_dense(), a_dense + b_dense)
-        tm.assert_numpy_array_equal((b + a).to_dense(), b_dense + a_dense)
-
-        tm.assert_numpy_array_equal((a - b).to_dense(), a_dense - b_dense)
-        tm.assert_numpy_array_equal((b - a).to_dense(), b_dense - a_dense)
-
-        tm.assert_numpy_array_equal((a * b).to_dense(), a_dense * b_dense)
-        tm.assert_numpy_array_equal((b * a).to_dense(), b_dense * a_dense)
-
-        tm.assert_numpy_array_equal((a / b).to_dense(), a_dense / b_dense)
-        tm.assert_numpy_array_equal((b / a).to_dense(), b_dense / a_dense)
-
-        tm.assert_numpy_array_equal((a // b).to_dense(), a_dense // b_dense)
-        tm.assert_numpy_array_equal((b // a).to_dense(), b_dense // a_dense)
-
-        tm.assert_numpy_array_equal((a % b).to_dense(), a_dense % b_dense)
-        tm.assert_numpy_array_equal((b % a).to_dense(), b_dense % a_dense)
-
-        tm.assert_numpy_array_equal((a ** b).to_dense(), a_dense ** b_dense)
-        tm.assert_numpy_array_equal((b ** a).to_dense(), b_dense ** a_dense)
-
-    def _check_comparison_ops(self, a, b, a_dense, b_dense):
-
-        def _check(res):
-            tm.assertIsInstance(res, SparseArray)
-            self.assertEqual(res.dtype, np.bool)
-            self.assertIsInstance(res.fill_value, bool)
-
-        _check(a == b)
-        tm.assert_numpy_array_equal((a == b).to_dense(), a_dense == b_dense)
-
-        _check(a != b)
-        tm.assert_numpy_array_equal((a != b).to_dense(), a_dense != b_dense)
-
-        _check(a >= b)
-        tm.assert_numpy_array_equal((a >= b).to_dense(), a_dense >= b_dense)
-
-        _check(a <= b)
-        tm.assert_numpy_array_equal((a <= b).to_dense(), a_dense <= b_dense)
-
-        _check(a > b)
-        tm.assert_numpy_array_equal((a > b).to_dense(), a_dense > b_dense)
-
-        _check(a < b)
-        tm.assert_numpy_array_equal((a < b).to_dense(), a_dense < b_dense)
-
-    def test_float_scalar(self):
-        values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
-
-        for kind in ['integer', 'block']:
-            a = SparseArray(values, kind=kind)
-            self._check_numeric_ops(a, 1, values, 1)
-            self._check_numeric_ops(a, 0, values, 0)
-            self._check_numeric_ops(a, 3, values, 3)
-
-            a = SparseArray(values, kind=kind, fill_value=0)
-            self._check_numeric_ops(a, 1, values, 1)
-            self._check_numeric_ops(a, 0, values, 0)
-            self._check_numeric_ops(a, 3, values, 3)
-
-            a = SparseArray(values, kind=kind, fill_value=2)
-            self._check_numeric_ops(a, 1, values, 1)
-            self._check_numeric_ops(a, 0, values, 0)
-            self._check_numeric_ops(a, 3, values, 3)
-
-    def test_float_scalar_comparison(self):
-        values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
-
-        for kind in ['integer', 'block']:
-            a = SparseArray(values, kind=kind)
-            self._check_comparison_ops(a, 1, values, 1)
-            self._check_comparison_ops(a, 0, values, 0)
-            self._check_comparison_ops(a, 3, values, 3)
-
-            a = SparseArray(values, kind=kind, fill_value=0)
-            self._check_comparison_ops(a, 1, values, 1)
-            self._check_comparison_ops(a, 0, values, 0)
-            self._check_comparison_ops(a, 3, values, 3)
-
-            a = SparseArray(values, kind=kind, fill_value=2)
-            self._check_comparison_ops(a, 1, values, 1)
-            self._check_comparison_ops(a, 0, values, 0)
-            self._check_comparison_ops(a, 3, values, 3)
-
-    def test_float_same_index(self):
-        # when sp_index are the same
-        for kind in ['integer', 'block']:
-            values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
-            rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
-
-            a = SparseArray(values, kind=kind)
-            b = SparseArray(rvalues, kind=kind)
-            self._check_numeric_ops(a, b, values, rvalues)
-
-            values = np.array([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
-            rvalues = np.array([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
-
-            a = SparseArray(values, kind=kind, fill_value=0)
-            b = SparseArray(rvalues, kind=kind, fill_value=0)
-            self._check_numeric_ops(a, b, values, rvalues)
-
-    def test_float_same_index_comparison(self):
-        # when sp_index are the same
-        for kind in ['integer', 'block']:
-            values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
-            rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
-
-            a = SparseArray(values, kind=kind)
-            b = SparseArray(rvalues, kind=kind)
-            self._check_comparison_ops(a, b, values, rvalues)
-
-            values = np.array([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
-            rvalues = np.array([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
-
-            a = SparseArray(values, kind=kind, fill_value=0)
-            b = SparseArray(rvalues, kind=kind, fill_value=0)
-            self._check_comparison_ops(a, b, values, rvalues)
-
-    def test_float_array(self):
-        values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
-        rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
-
-        for kind in ['integer', 'block']:
-            a = SparseArray(values, kind=kind)
-            b = SparseArray(rvalues, kind=kind)
-            self._check_numeric_ops(a, b, values, rvalues)
-            self._check_numeric_ops(a, b * 0, values, rvalues * 0)
-
-            a = SparseArray(values, kind=kind, fill_value=0)
-            b = SparseArray(rvalues, kind=kind)
-            self._check_numeric_ops(a, b, values, rvalues)
-
-            a = SparseArray(values, kind=kind, fill_value=0)
-            b = SparseArray(rvalues, kind=kind, fill_value=0)
-            self._check_numeric_ops(a, b, values, rvalues)
-
-            a = SparseArray(values, kind=kind, fill_value=1)
-            b = SparseArray(rvalues, kind=kind, fill_value=2)
-            self._check_numeric_ops(a, b, values, rvalues)
-
-    def test_float_array_different_kind(self):
-        values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
-        rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
-
-        a = SparseArray(values, kind='integer')
-        b = SparseArray(rvalues, kind='block')
-        self._check_numeric_ops(a, b, values, rvalues)
-        self._check_numeric_ops(a, b * 0, values, rvalues * 0)
-
-        a = SparseArray(values, kind='integer', fill_value=0)
-        b = SparseArray(rvalues, kind='block')
-        self._check_numeric_ops(a, b, values, rvalues)
-
-        a = SparseArray(values, kind='integer', fill_value=0)
-        b = SparseArray(rvalues, kind='block', fill_value=0)
-        self._check_numeric_ops(a, b, values, rvalues)
-
-        a = SparseArray(values, kind='integer', fill_value=1)
-        b = SparseArray(rvalues, kind='block', fill_value=2)
-        self._check_numeric_ops(a, b, values, rvalues)
-
-    def test_float_array_comparison(self):
-        values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
-        rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
-
-        for kind in ['integer', 'block']:
-            a = SparseArray(values, kind=kind)
-            b = SparseArray(rvalues, kind=kind)
-            self._check_comparison_ops(a, b, values, rvalues)
-            self._check_comparison_ops(a, b * 0, values, rvalues * 0)
-
-            a = SparseArray(values, kind=kind, fill_value=0)
-            b = SparseArray(rvalues, kind=kind)
-            self._check_comparison_ops(a, b, values, rvalues)
-
-            a = SparseArray(values, kind=kind, fill_value=0)
-            b = SparseArray(rvalues, kind=kind, fill_value=0)
-            self._check_comparison_ops(a, b, values, rvalues)
-
-            a = SparseArray(values, kind=kind, fill_value=1)
-            b = SparseArray(rvalues, kind=kind, fill_value=2)
-            self._check_comparison_ops(a, b, values, rvalues)
-
-
 class TestSparseArrayAnalytics(tm.TestCase):
     def test_sum(self):
         data = np.arange(10).astype(float)
diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py
index 1f88d22bd8f93..7d520e9677933 100644
--- a/pandas/sparse/tests/test_indexing.py
+++ b/pandas/sparse/tests/test_indexing.py
@@ -134,6 +134,7 @@ def test_loc_index(self):
 
         # sparse array (actuary it coerces to normal Series)
         result = sparse.loc[sparse % 2 == 1]
+        print((sparse % 2 == 1).values)
         exp = orig.loc[orig % 2 == 1].to_sparse()
         tm.assert_sp_series_equal(result, exp)
 
diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py
index 11bf980a99fec..4417411403baa 100644
--- a/pandas/sparse/tests/test_libsparse.py
+++ b/pandas/sparse/tests/test_libsparse.py
@@ -486,13 +486,14 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
             xfill = 0
             yfill = 2
 
-            result_block_vals, rb_index = sparse_op(x, xindex, xfill, y,
-                                                    yindex, yfill)
-            result_int_vals, ri_index = sparse_op(x, xdindex, xfill, y,
-                                                  ydindex, yfill)
+            result_block_vals, rb_index, bfill = sparse_op(x, xindex, xfill, y,
+                                                           yindex, yfill)
+            result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y,
+                                                         ydindex, yfill)
 
             self.assertTrue(rb_index.to_int_index().equals(ri_index))
             tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
+            self.assertEqual(bfill, ifill)
 
             # check versus Series...
             xseries = Series(x, xdindex.indices)
@@ -517,7 +518,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
 
 def make_optestf(op):
     def f(self):
-        sparse_op = getattr(splib, 'sparse_%s' % op)
+        sparse_op = getattr(splib, 'sparse_%s_float64' % op)
         python_op = getattr(operator, op)
         self._op_tests(sparse_op, python_op)
 
diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py
index f9ac7d9d34072..4c3b340c9f6f1 100644
--- a/pandas/sparse/tests/test_series.py
+++ b/pandas/sparse/tests/test_series.py
@@ -512,6 +512,7 @@ def test_setslice(self):
                                       name=self.bseries.name))
 
     def test_operators(self):
+
         def _check_op(a, b, op):
             sp_result = op(a, b)
             adense = a.to_dense() if isinstance(a, SparseSeries) else a
@@ -781,7 +782,7 @@ def test_fill_value_corner(self):
         cop2 = self.zbseries.copy()
         cop2.fill_value = 1
         result = cop2 / cop
-        self.assertTrue(np.isnan(result.fill_value))
+        self.assertEqual(result.fill_value, np.inf)
 
     def test_fill_value_when_combine_const(self):
         # GH12723
@@ -1239,6 +1240,7 @@ def _dense_series_compare(s, f):
 
 
 class TestSparseSeriesAnalytics(tm.TestCase):
+
     def setUp(self):
         arr, index = _test_data1()
         self.bseries = SparseSeries(arr, index=index, kind='block',
diff --git a/pandas/src/sparse.pyx b/pandas/src/sparse.pyx
index 94ae26e00f087..9908aef592ad3 100644
--- a/pandas/src/sparse.pyx
+++ b/pandas/src/sparse.pyx
@@ -1,4 +1,5 @@
-from numpy cimport ndarray, uint8_t, int32_t, float64_t
+from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t,
+                    float64_t, float32_t, float16_t)
 cimport numpy as np
 
 cimport cython
@@ -754,346 +755,9 @@ cdef class BlockUnion(BlockMerge):
 #-------------------------------------------------------------------------------
 # Sparse arithmetic
 
-ctypedef float64_t (* double_func)(float64_t a, float64_t b)
+include "sparse_op_helper.pxi"
 
 
-cdef inline tuple sparse_combine(ndarray x, SparseIndex xindex, float64_t xfill,
-                                 ndarray y, SparseIndex yindex, float64_t yfill,
-                                 double_func op):
-    if isinstance(xindex, BlockIndex):
-        return block_op(x, xindex.to_block_index(), xfill,
-                        y, yindex.to_block_index(), yfill, op)
-    elif isinstance(xindex, IntIndex):
-        return int_op(x, xindex.to_int_index(), xfill,
-                      y, yindex.to_int_index(), yfill, op)
-
-
-@cython.boundscheck(False)
-cdef inline tuple block_op(ndarray x_, BlockIndex xindex, float64_t xfill,
-                           ndarray y_, BlockIndex yindex, float64_t yfill,
-                           double_func op):
-    """
-    Binary operator on BlockIndex objects with fill values
-    """
-
-    cdef:
-        BlockIndex out_index
-        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
-        Py_ssize_t xbp = 0, ybp = 0 # block positions
-        int32_t xloc, yloc
-        Py_ssize_t xblock = 0, yblock = 0 # block numbers
-
-        ndarray[float64_t, ndim=1] x, y
-        ndarray[float64_t, ndim=1] out
-
-    # to suppress Cython warning
-    x = x_
-    y = y_
-
-    out_index = xindex.make_union(yindex)
-    out = np.empty(out_index.npoints, dtype=np.float64)
-
-    # Wow, what a hack job. Need to do something about this
-
-    # walk the two SparseVectors, adding matched locations...
-    for out_i from 0 <= out_i < out_index.npoints:
-        if yblock == yindex.nblocks:
-            # use y fill value
-            out[out_i] = op(x[xi], yfill)
-            xi += 1
-
-            # advance x location
-            xbp += 1
-            if xbp == xindex.lenbuf[xblock]:
-                xblock += 1
-                xbp = 0
-            continue
-
-        if xblock == xindex.nblocks:
-            # use x fill value
-            out[out_i] = op(xfill, y[yi])
-            yi += 1
-
-            # advance y location
-            ybp += 1
-            if ybp == yindex.lenbuf[yblock]:
-                yblock += 1
-                ybp = 0
-            continue
-
-        yloc = yindex.locbuf[yblock] + ybp
-        xloc = xindex.locbuf[xblock] + xbp
-
-        # each index in the out_index had to come from either x, y, or both
-        if xloc == yloc:
-            out[out_i] = op(x[xi], y[yi])
-            xi += 1
-            yi += 1
-
-            # advance both locations
-            xbp += 1
-            if xbp == xindex.lenbuf[xblock]:
-                xblock += 1
-                xbp = 0
-
-            ybp += 1
-            if ybp == yindex.lenbuf[yblock]:
-                yblock += 1
-                ybp = 0
-
-        elif xloc < yloc:
-            # use y fill value
-            out[out_i] = op(x[xi], yfill)
-            xi += 1
-
-            # advance x location
-            xbp += 1
-            if xbp == xindex.lenbuf[xblock]:
-                xblock += 1
-                xbp = 0
-        else:
-            # use x fill value
-            out[out_i] = op(xfill, y[yi])
-            yi += 1
-
-            # advance y location
-            ybp += 1
-            if ybp == yindex.lenbuf[yblock]:
-                yblock += 1
-                ybp = 0
-
-    return out, out_index
-
-
-@cython.boundscheck(False)
-cdef inline tuple int_op(ndarray x_, IntIndex xindex, float64_t xfill,
-                         ndarray y_, IntIndex yindex, float64_t yfill,
-                         double_func op):
-    cdef:
-        IntIndex out_index
-        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
-        int32_t xloc, yloc
-        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
-        ndarray[float64_t, ndim=1] x, y
-        ndarray[float64_t, ndim=1] out
-
-    # suppress Cython compiler warnings due to inlining
-    x = x_
-    y = y_
-
-    # need to do this first to know size of result array
-    out_index = xindex.make_union(yindex)
-    out = np.empty(out_index.npoints, dtype=np.float64)
-
-    xindices = xindex.indices
-    yindices = yindex.indices
-    out_indices = out_index.indices
-
-    # walk the two SparseVectors, adding matched locations...
-    for out_i from 0 <= out_i < out_index.npoints:
-        if xi == xindex.npoints:
-            # use x fill value
-            out[out_i] = op(xfill, y[yi])
-            yi += 1
-            continue
-
-        if yi == yindex.npoints:
-            # use y fill value
-            out[out_i] = op(x[xi], yfill)
-            xi += 1
-            continue
-
-        xloc = xindices[xi]
-        yloc = yindices[yi]
-
-        # each index in the out_index had to come from either x, y, or both
-        if xloc == yloc:
-            out[out_i] = op(x[xi], y[yi])
-            xi += 1
-            yi += 1
-        elif xloc < yloc:
-            # use y fill value
-            out[out_i] = op(x[xi], yfill)
-            xi += 1
-        else:
-            # use x fill value
-            out[out_i] = op(xfill, y[yi])
-            yi += 1
-
-    return out, out_index
-
-cdef inline float64_t __add(float64_t a, float64_t b):
-    return a + b
-
-cdef inline float64_t __sub(float64_t a, float64_t b):
-    return a - b
-
-cdef inline float64_t __rsub(float64_t a, float64_t b):
-    return b - a
-
-cdef inline float64_t __div(float64_t a, float64_t b):
-    if b == 0:
-        if a > 0:
-            return INF
-        elif a < 0:
-            return -INF
-        else:
-            return NaN
-    else:
-        return a / b
-
-cdef inline float64_t __rdiv(float64_t a, float64_t b):
-    return __div(b, a)
-
-cdef inline float64_t __floordiv(float64_t a, float64_t b):
-    if b == 0:
-        # numpy >= 1.11 returns NaN
-        # for a // 0, rather than +-inf
-        if _np_version_under1p11:
-            if a > 0:
-                return INF
-            elif a < 0:
-                return -INF
-        return NaN
-    else:
-        return a // b
-
-cdef inline float64_t __rfloordiv(float64_t a, float64_t b):
-    return __floordiv(b, a)
-
-cdef inline float64_t __mul(float64_t a, float64_t b):
-    return a * b
-
-cdef inline float64_t __eq(float64_t a, float64_t b):
-    return a == b
-
-cdef inline float64_t __ne(float64_t a, float64_t b):
-    return a != b
-
-cdef inline float64_t __lt(float64_t a, float64_t b):
-    return a < b
-
-cdef inline float64_t __gt(float64_t a, float64_t b):
-    return a > b
-
-cdef inline float64_t __le(float64_t a, float64_t b):
-    return a <= b
-
-cdef inline float64_t __ge(float64_t a, float64_t b):
-    return a >= b
-
-cdef inline float64_t __mod(float64_t a, float64_t b):
-    if b == 0:
-        return NaN
-    else:
-        return a % b
-
-cdef inline float64_t __rmod(float64_t a, float64_t b):
-    return __mod(b, a)
-
-cdef inline float64_t __pow(float64_t a, float64_t b):
-    return a ** b
-
-cdef inline float64_t __rpow(float64_t a, float64_t b):
-    return __pow(b, a)
-
-
-# This probably needs to be "templated" to achieve maximum performance.
-# TODO: quantify performance boost to "templating"
-
-cpdef sparse_add(ndarray x, SparseIndex xindex, float64_t xfill,
-                 ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                             y, yindex, yfill, __add)
-
-cpdef sparse_sub(ndarray x, SparseIndex xindex, float64_t xfill,
-                 ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                             y, yindex, yfill, __sub)
-
-cpdef sparse_rsub(ndarray x, SparseIndex xindex, float64_t xfill,
-                  ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                             y, yindex, yfill, __rsub)
-
-cpdef sparse_mul(ndarray x, SparseIndex xindex, float64_t xfill,
-                 ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                             y, yindex, yfill, __mul)
-
-cpdef sparse_div(ndarray x, SparseIndex xindex, float64_t xfill,
-                 ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                             y, yindex, yfill, __div)
-
-cpdef sparse_rdiv(ndarray x, SparseIndex xindex, float64_t xfill,
-                  ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                             y, yindex, yfill, __rdiv)
-
-sparse_truediv = sparse_div
-sparse_rtruediv = sparse_rdiv
-
-cpdef sparse_floordiv(ndarray x, SparseIndex xindex, float64_t xfill,
-                      ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __floordiv)
-
-cpdef sparse_rfloordiv(ndarray x, SparseIndex xindex, float64_t xfill,
-                       ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __rfloordiv)
-
-cpdef sparse_mod(ndarray x, SparseIndex xindex, float64_t xfill,
-                 ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __mod)
-
-cpdef sparse_rmod(ndarray x, SparseIndex xindex, float64_t xfill,
-                  ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __rmod)
-
-cpdef sparse_pow(ndarray x, SparseIndex xindex, float64_t xfill,
-                 ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __pow)
-
-cpdef sparse_rpow(ndarray x, SparseIndex xindex, float64_t xfill,
-                  ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __rpow)
-
-cpdef sparse_eq(ndarray x, SparseIndex xindex, float64_t xfill,
-                  ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __eq)
-
-cpdef sparse_ne(ndarray x, SparseIndex xindex, float64_t xfill,
-                  ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __ne)
-
-cpdef sparse_lt(ndarray x, SparseIndex xindex, float64_t xfill,
-                  ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __lt)
-
-cpdef sparse_gt(ndarray x, SparseIndex xindex, float64_t xfill,
-                  ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __gt)
-
-cpdef sparse_le(ndarray x, SparseIndex xindex, float64_t xfill,
-                ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __le)
-
-cpdef sparse_ge(ndarray x, SparseIndex xindex, float64_t xfill,
-                ndarray y, SparseIndex yindex, float64_t yfill):
-    return sparse_combine(x, xindex, xfill,
-                          y, yindex, yfill, __ge)
-
 #-------------------------------------------------------------------------------
 # Indexing operations
 
diff --git a/pandas/src/sparse_op_helper.pxi b/pandas/src/sparse_op_helper.pxi
new file mode 100644
index 0000000000000..a49036d02896c
--- /dev/null
+++ b/pandas/src/sparse_op_helper.pxi
@@ -0,0 +1,5532 @@
+"""
+Template for each `dtype` helper function for sparse ops
+
+WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
+"""
+
+#----------------------------------------------------------------------
+# Sparse op
+#----------------------------------------------------------------------
+
+cdef inline float64_t __div_float64(float64_t a, float64_t b):
+    if b == 0:
+        if a > 0:
+            return INF
+        elif a < 0:
+            return -INF
+        else:
+            return NaN
+    else:
+        return float(a) / b
+
+cdef inline float64_t __truediv_float64(float64_t a, float64_t b):
+    return __div_float64(a, b)
+
+cdef inline float64_t __floordiv_float64(float64_t a, float64_t b):
+    if b == 0:
+        # numpy >= 1.11 returns NaN
+        # for a // 0, rather than +-inf
+        if _np_version_under1p11:
+            if a > 0:
+                return INF
+            elif a < 0:
+                return -INF
+        return NaN
+    else:
+        return a // b
+
+cdef inline float64_t __mod_float64(float64_t a, float64_t b):
+    if b == 0:
+        return NaN
+    else:
+        return a % b
+
+cdef inline float64_t __div_int64(int64_t a, int64_t b):
+    if b == 0:
+        if a > 0:
+            return INF
+        elif a < 0:
+            return -INF
+        else:
+            return NaN
+    else:
+        return float(a) / b
+
+cdef inline float64_t __truediv_int64(int64_t a, int64_t b):
+    return __div_int64(a, b)
+
+cdef inline int64_t __floordiv_int64(int64_t a, int64_t b):
+    if b == 0:
+        return 0
+    else:
+        return a // b
+
+cdef inline int64_t __mod_int64(int64_t a, int64_t b):
+    if b == 0:
+        return 0
+    else:
+        return a % b
+
+#----------------------------------------------------------------------
+# sparse array op
+#----------------------------------------------------------------------
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_add_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] + yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill + y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] + y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] + yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill + y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill + yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_add_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill + y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] + yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] + y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] + yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill + y[yi]
+            yi += 1
+
+    return out, out_index, xfill + yfill
+
+
+cpdef sparse_add_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_add_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_add_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_add_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = x[i] + y[i]
+    return out
+
+
+cpdef sparse_fill_add_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill + yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_add_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] + yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill + y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] + y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] + yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill + y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill + yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_add_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill + y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] + yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] + y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] + yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill + y[yi]
+            yi += 1
+
+    return out, out_index, xfill + yfill
+
+
+cpdef sparse_add_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_add_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_add_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_add_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[int64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.int64)
+
+    for i in range(len(x)):
+        out[i] = x[i] + y[i]
+    return out
+
+
+cpdef sparse_fill_add_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill + yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_sub_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] - yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill - y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] - y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] - yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill - y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill - yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_sub_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill - y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] - yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] - y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] - yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill - y[yi]
+            yi += 1
+
+    return out, out_index, xfill - yfill
+
+
+cpdef sparse_sub_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_sub_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_sub_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_sub_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = x[i] - y[i]
+    return out
+
+
+cpdef sparse_fill_sub_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill - yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_sub_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] - yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill - y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] - y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] - yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill - y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill - yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_sub_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill - y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] - yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] - y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] - yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill - y[yi]
+            yi += 1
+
+    return out, out_index, xfill - yfill
+
+
+cpdef sparse_sub_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_sub_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_sub_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_sub_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[int64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.int64)
+
+    for i in range(len(x)):
+        out[i] = x[i] - y[i]
+    return out
+
+
+cpdef sparse_fill_sub_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill - yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_mul_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] * yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill * y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] * y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] * yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill * y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill * yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_mul_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill * y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] * yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] * y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] * yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill * y[yi]
+            yi += 1
+
+    return out, out_index, xfill * yfill
+
+
+cpdef sparse_mul_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_mul_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_mul_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_mul_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = x[i] * y[i]
+    return out
+
+
+cpdef sparse_fill_mul_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill * yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_mul_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] * yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill * y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] * y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] * yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill * y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill * yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_mul_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill * y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] * yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] * y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] * yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill * y[yi]
+            yi += 1
+
+    return out, out_index, xfill * yfill
+
+
+cpdef sparse_mul_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_mul_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_mul_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_mul_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[int64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.int64)
+
+    for i in range(len(x)):
+        out[i] = x[i] * y[i]
+    return out
+
+
+cpdef sparse_fill_mul_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill * yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_div_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = __div_float64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = __div_float64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __div_float64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __div_float64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = __div_float64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, __div_float64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_div_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = __div_float64(xfill, y[yi])
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = __div_float64(x[xi], yfill)
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __div_float64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __div_float64(x[xi], yfill)
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = __div_float64(xfill, y[yi])
+            yi += 1
+
+    return out, out_index, __div_float64(xfill, yfill)
+
+
+cpdef sparse_div_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_div_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_div_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_div_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = __div_float64(x[i], y[i])
+    return out
+
+
+cpdef sparse_fill_div_float64(float64_t xfill,
+                                       float64_t yfill):
+    return __div_float64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_div_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = __div_int64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = __div_int64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __div_int64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __div_int64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = __div_int64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, __div_int64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_div_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = __div_int64(xfill, y[yi])
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = __div_int64(x[xi], yfill)
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __div_int64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __div_int64(x[xi], yfill)
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = __div_int64(xfill, y[yi])
+            yi += 1
+
+    return out, out_index, __div_int64(xfill, yfill)
+
+
+cpdef sparse_div_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_div_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_div_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_div_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = __div_int64(x[i], y[i])
+    return out
+
+
+cpdef sparse_fill_div_int64(int64_t xfill,
+                                       int64_t yfill):
+    return __div_int64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_mod_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = __mod_float64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = __mod_float64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __mod_float64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __mod_float64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = __mod_float64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, __mod_float64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_mod_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = __mod_float64(xfill, y[yi])
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = __mod_float64(x[xi], yfill)
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __mod_float64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __mod_float64(x[xi], yfill)
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = __mod_float64(xfill, y[yi])
+            yi += 1
+
+    return out, out_index, __mod_float64(xfill, yfill)
+
+
+cpdef sparse_mod_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_mod_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_mod_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_mod_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = __mod_float64(x[i], y[i])
+    return out
+
+
+cpdef sparse_fill_mod_float64(float64_t xfill,
+                                       float64_t yfill):
+    return __mod_float64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_mod_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = __mod_int64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = __mod_int64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __mod_int64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __mod_int64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = __mod_int64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, __mod_int64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_mod_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = __mod_int64(xfill, y[yi])
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = __mod_int64(x[xi], yfill)
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __mod_int64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __mod_int64(x[xi], yfill)
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = __mod_int64(xfill, y[yi])
+            yi += 1
+
+    return out, out_index, __mod_int64(xfill, yfill)
+
+
+cpdef sparse_mod_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_mod_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_mod_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_mod_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[int64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.int64)
+
+    for i in range(len(x)):
+        out[i] = __mod_int64(x[i], y[i])
+    return out
+
+
+cpdef sparse_fill_mod_int64(int64_t xfill,
+                                       int64_t yfill):
+    return __mod_int64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_truediv_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = __truediv_float64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = __truediv_float64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __truediv_float64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __truediv_float64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = __truediv_float64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, __truediv_float64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_truediv_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = __truediv_float64(xfill, y[yi])
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = __truediv_float64(x[xi], yfill)
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __truediv_float64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __truediv_float64(x[xi], yfill)
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = __truediv_float64(xfill, y[yi])
+            yi += 1
+
+    return out, out_index, __truediv_float64(xfill, yfill)
+
+
+cpdef sparse_truediv_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_truediv_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_truediv_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_truediv_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = __truediv_float64(x[i], y[i])
+    return out
+
+
+cpdef sparse_fill_truediv_float64(float64_t xfill,
+                                       float64_t yfill):
+    return __truediv_float64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_truediv_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = __truediv_int64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = __truediv_int64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __truediv_int64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __truediv_int64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = __truediv_int64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, __truediv_int64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_truediv_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = __truediv_int64(xfill, y[yi])
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = __truediv_int64(x[xi], yfill)
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __truediv_int64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __truediv_int64(x[xi], yfill)
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = __truediv_int64(xfill, y[yi])
+            yi += 1
+
+    return out, out_index, __truediv_int64(xfill, yfill)
+
+
+cpdef sparse_truediv_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_truediv_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_truediv_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_truediv_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = __truediv_int64(x[i], y[i])
+    return out
+
+
+cpdef sparse_fill_truediv_int64(int64_t xfill,
+                                       int64_t yfill):
+    return __truediv_int64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_floordiv_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = __floordiv_float64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = __floordiv_float64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __floordiv_float64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __floordiv_float64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = __floordiv_float64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, __floordiv_float64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_floordiv_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = __floordiv_float64(xfill, y[yi])
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = __floordiv_float64(x[xi], yfill)
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __floordiv_float64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __floordiv_float64(x[xi], yfill)
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = __floordiv_float64(xfill, y[yi])
+            yi += 1
+
+    return out, out_index, __floordiv_float64(xfill, yfill)
+
+
+cpdef sparse_floordiv_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_floordiv_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_floordiv_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_floordiv_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = __floordiv_float64(x[i], y[i])
+    return out
+
+
+cpdef sparse_fill_floordiv_float64(float64_t xfill,
+                                       float64_t yfill):
+    return __floordiv_float64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_floordiv_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = __floordiv_int64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = __floordiv_int64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __floordiv_int64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __floordiv_int64(x[xi], yfill)
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = __floordiv_int64(xfill, y[yi])
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, __floordiv_int64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_floordiv_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = __floordiv_int64(xfill, y[yi])
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = __floordiv_int64(x[xi], yfill)
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = __floordiv_int64(x[xi], y[yi])
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = __floordiv_int64(x[xi], yfill)
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = __floordiv_int64(xfill, y[yi])
+            yi += 1
+
+    return out, out_index, __floordiv_int64(xfill, yfill)
+
+
+cpdef sparse_floordiv_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_floordiv_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_floordiv_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_floordiv_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[int64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.int64)
+
+    for i in range(len(x)):
+        out[i] = __floordiv_int64(x[i], y[i])
+    return out
+
+
+cpdef sparse_fill_floordiv_int64(int64_t xfill,
+                                       int64_t yfill):
+    return __floordiv_int64(xfill, yfill)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_pow_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] ** yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill ** y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] ** y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] ** yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill ** y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill ** yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_pow_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[float64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.float64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill ** y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] ** yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] ** y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] ** yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill ** y[yi]
+            yi += 1
+
+    return out, out_index, xfill ** yfill
+
+
+cpdef sparse_pow_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_pow_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_pow_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_pow_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[float64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.float64)
+
+    for i in range(len(x)):
+        out[i] = x[i] ** y[i]
+    return out
+
+
+cpdef sparse_fill_pow_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill ** yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_pow_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] ** yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill ** y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] ** y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] ** yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill ** y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill ** yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_pow_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[int64_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.int64)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill ** y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] ** yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] ** y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] ** yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill ** y[yi]
+            yi += 1
+
+    return out, out_index, xfill ** yfill
+
+
+cpdef sparse_pow_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_pow_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_pow_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_pow_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[int64_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.int64)
+
+    for i in range(len(x)):
+        out[i] = x[i] ** y[i]
+    return out
+
+
+cpdef sparse_fill_pow_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill ** yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_eq_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] == yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill == y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] == y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] == yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill == y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill == yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_eq_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill == y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] == yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] == y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] == yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill == y[yi]
+            yi += 1
+
+    return out, out_index, xfill == yfill
+
+
+cpdef sparse_eq_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_eq_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_eq_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_eq_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] == y[i]
+    return out
+
+
+cpdef sparse_fill_eq_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill == yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_eq_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] == yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill == y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] == y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] == yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill == y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill == yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_eq_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill == y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] == yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] == y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] == yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill == y[yi]
+            yi += 1
+
+    return out, out_index, xfill == yfill
+
+
+cpdef sparse_eq_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_eq_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_eq_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_eq_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] == y[i]
+    return out
+
+
+cpdef sparse_fill_eq_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill == yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_ne_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] != yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill != y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] != y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] != yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill != y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill != yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_ne_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill != y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] != yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] != y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] != yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill != y[yi]
+            yi += 1
+
+    return out, out_index, xfill != yfill
+
+
+cpdef sparse_ne_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_ne_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_ne_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_ne_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] != y[i]
+    return out
+
+
+cpdef sparse_fill_ne_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill != yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_ne_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] != yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill != y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] != y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] != yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill != y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill != yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_ne_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill != y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] != yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] != y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] != yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill != y[yi]
+            yi += 1
+
+    return out, out_index, xfill != yfill
+
+
+cpdef sparse_ne_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_ne_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_ne_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_ne_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] != y[i]
+    return out
+
+
+cpdef sparse_fill_ne_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill != yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_lt_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] < yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill < y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] < y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] < yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill < y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill < yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_lt_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill < y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] < yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] < y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] < yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill < y[yi]
+            yi += 1
+
+    return out, out_index, xfill < yfill
+
+
+cpdef sparse_lt_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_lt_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_lt_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_lt_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] < y[i]
+    return out
+
+
+cpdef sparse_fill_lt_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill < yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_lt_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] < yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill < y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] < y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] < yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill < y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill < yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_lt_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill < y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] < yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] < y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] < yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill < y[yi]
+            yi += 1
+
+    return out, out_index, xfill < yfill
+
+
+cpdef sparse_lt_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_lt_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_lt_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_lt_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] < y[i]
+    return out
+
+
+cpdef sparse_fill_lt_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill < yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_gt_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] > yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill > y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] > y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] > yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill > y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill > yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_gt_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill > y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] > yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] > y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] > yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill > y[yi]
+            yi += 1
+
+    return out, out_index, xfill > yfill
+
+
+cpdef sparse_gt_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_gt_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_gt_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_gt_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] > y[i]
+    return out
+
+
+cpdef sparse_fill_gt_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill > yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_gt_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] > yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill > y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] > y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] > yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill > y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill > yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_gt_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill > y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] > yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] > y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] > yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill > y[yi]
+            yi += 1
+
+    return out, out_index, xfill > yfill
+
+
+cpdef sparse_gt_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_gt_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_gt_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_gt_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] > y[i]
+    return out
+
+
+cpdef sparse_fill_gt_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill > yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_le_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] <= yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill <= y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] <= y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] <= yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill <= y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill <= yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_le_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill <= y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] <= yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] <= y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] <= yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill <= y[yi]
+            yi += 1
+
+    return out, out_index, xfill <= yfill
+
+
+cpdef sparse_le_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_le_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_le_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_le_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] <= y[i]
+    return out
+
+
+cpdef sparse_fill_le_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill <= yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_le_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] <= yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill <= y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] <= y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] <= yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill <= y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill <= yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_le_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill <= y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] <= yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] <= y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] <= yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill <= y[yi]
+            yi += 1
+
+    return out, out_index, xfill <= yfill
+
+
+cpdef sparse_le_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_le_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_le_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_le_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] <= y[i]
+    return out
+
+
+cpdef sparse_fill_le_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill <= yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_ge_float64(ndarray x_,
+                                                BlockIndex xindex,
+                                                float64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                float64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] >= yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill >= y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] >= y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] >= yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill >= y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill >= yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_ge_float64(ndarray x_, IntIndex xindex,
+                                              float64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              float64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[float64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill >= y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] >= yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] >= y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] >= yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill >= y[yi]
+            yi += 1
+
+    return out, out_index, xfill >= yfill
+
+
+cpdef sparse_ge_float64(ndarray[float64_t, ndim=1] x,
+                                  SparseIndex xindex, float64_t xfill,
+                                  ndarray[float64_t, ndim=1] y,
+                                  SparseIndex yindex, float64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_ge_float64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_ge_float64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_ge_float64(ndarray[float64_t, ndim=1] x,
+                                        ndarray[float64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] >= y[i]
+    return out
+
+
+cpdef sparse_fill_ge_float64(float64_t xfill,
+                                       float64_t yfill):
+    return xfill >= yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_ge_int64(ndarray x_,
+                                                BlockIndex xindex,
+                                                int64_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                int64_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = x[xi] >= yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = xfill >= y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] >= y[yi]
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] >= yfill
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = xfill >= y[yi]
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, xfill >= yfill
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_ge_int64(ndarray x_, IntIndex xindex,
+                                              int64_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              int64_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[int64_t, ndim=1] x, y
+        ndarray[uint8_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.uint8)
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = xfill >= y[yi]
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = x[xi] >= yfill
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = x[xi] >= y[yi]
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = x[xi] >= yfill
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = xfill >= y[yi]
+            yi += 1
+
+    return out, out_index, xfill >= yfill
+
+
+cpdef sparse_ge_int64(ndarray[int64_t, ndim=1] x,
+                                  SparseIndex xindex, int64_t xfill,
+                                  ndarray[int64_t, ndim=1] y,
+                                  SparseIndex yindex, int64_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_ge_int64(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_ge_int64(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_ge_int64(ndarray[int64_t, ndim=1] x,
+                                        ndarray[int64_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[uint8_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.uint8)
+
+    for i in range(len(x)):
+        out[i] = x[i] >= y[i]
+    return out
+
+
+cpdef sparse_fill_ge_int64(int64_t xfill,
+                                       int64_t yfill):
+    return xfill >= yfill
diff --git a/pandas/src/sparse_op_helper.pxi.in b/pandas/src/sparse_op_helper.pxi.in
new file mode 100644
index 0000000000000..73fd5e46f46a6
--- /dev/null
+++ b/pandas/src/sparse_op_helper.pxi.in
@@ -0,0 +1,337 @@
+"""
+Template for each `dtype` helper function for sparse ops
+
+WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
+"""
+
+#----------------------------------------------------------------------
+# Sparse op
+#----------------------------------------------------------------------
+
+{{py:
+
+# dtype, float_group
+dtypes = [('float64', True), ('int64', False)]
+
+}}
+
+{{for dtype, float_group in dtypes}}
+
+{{if float_group}}
+
+cdef inline {{dtype}}_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+    if b == 0:
+        if a > 0:
+            return INF
+        elif a < 0:
+            return -INF
+        else:
+            return NaN
+    else:
+        return float(a) / b
+
+cdef inline {{dtype}}_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+    return __div_{{dtype}}(a, b)
+
+cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+    if b == 0:
+        # numpy >= 1.11 returns NaN
+        # for a // 0, rather than +-inf
+        if _np_version_under1p11:
+            if a > 0:
+                return INF
+            elif a < 0:
+                return -INF
+        return NaN
+    else:
+        return a // b
+
+cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+    if b == 0:
+        return NaN
+    else:
+        return a % b
+
+{{else}}
+
+cdef inline float64_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+    if b == 0:
+        if a > 0:
+            return INF
+        elif a < 0:
+            return -INF
+        else:
+            return NaN
+    else:
+        return float(a) / b
+
+cdef inline float64_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+    return __div_{{dtype}}(a, b)
+
+cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+    if b == 0:
+        return 0
+    else:
+        return a // b
+
+cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
+    if b == 0:
+        return 0
+    else:
+        return a % b
+
+{{endif}}
+
+{{endfor}}
+
+#----------------------------------------------------------------------
+# sparse array op
+#----------------------------------------------------------------------
+
+{{py:
+
+# dtype
+dtypes = ['float64', 'int64']
+
+def get_op(tup):
+    assert isinstance(tup, tuple)
+    assert len(tup) == 4
+
+    opname, lval, rval, dtype = tup
+
+    ops_dict = {'add': '{0} + {1}',
+                'sub': '{0} - {1}',
+                'mul': '{0} * {1}',
+                'div': '__div_{2}({0}, {1})',
+                'mod': '__mod_{2}({0}, {1})',
+                'truediv': '__truediv_{2}({0}, {1})',
+                'floordiv': '__floordiv_{2}({0}, {1})',
+                'pow': '{0} ** {1}',
+                'eq': '{0} == {1}',
+                'ne': '{0} != {1}',
+                'lt': '{0} < {1}',
+                'gt': '{0} > {1}',
+                'le': '{0} <= {1}',
+                'ge': '{0} >= {1}'}
+
+    return ops_dict[opname].format(lval, rval, dtype)
+
+
+def get_dispatch(dtypes):
+
+    ops_list = ['add', 'sub', 'mul', 'div', 'mod', 'truediv',
+                'floordiv', 'pow', 'eq', 'ne', 'lt', 'gt', 'le', 'ge']
+
+    for opname in ops_list:
+        for dtype in dtypes:
+
+            if opname in ('div', 'truediv'):
+                rdtype = 'float64'
+            elif opname in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
+                rdtype = 'uint8'
+            else:
+                rdtype = dtype
+
+            yield opname, dtype, rdtype
+
+}}
+
+
+{{for opname, dtype, rdtype in get_dispatch(dtypes)}}
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_,
+                                                BlockIndex xindex,
+                                                {{dtype}}_t xfill,
+                                                ndarray y_,
+                                                BlockIndex yindex,
+                                                {{dtype}}_t yfill):
+    '''
+    Binary operator on BlockIndex objects with fill values
+    '''
+
+    cdef:
+        BlockIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xbp = 0, ybp = 0 # block positions
+        int32_t xloc, yloc
+        Py_ssize_t xblock = 0, yblock = 0 # block numbers
+
+        ndarray[{{dtype}}_t, ndim=1] x, y
+        ndarray[{{rdtype}}_t, ndim=1] out
+
+    # to suppress Cython warning
+    x = x_
+    y = y_
+
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.{{rdtype}})
+
+    # Wow, what a hack job. Need to do something about this
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if yblock == yindex.nblocks:
+            # use y fill value
+            out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+            continue
+
+        if xblock == xindex.nblocks:
+            # use x fill value
+            out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+            continue
+
+        yloc = yindex.locbuf[yblock] + ybp
+        xloc = xindex.locbuf[xblock] + xbp
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}}
+            xi += 1
+            yi += 1
+
+            # advance both locations
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
+            xi += 1
+
+            # advance x location
+            xbp += 1
+            if xbp == xindex.lenbuf[xblock]:
+                xblock += 1
+                xbp = 0
+        else:
+            # use x fill value
+            out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
+            yi += 1
+
+            # advance y location
+            ybp += 1
+            if ybp == yindex.lenbuf[yblock]:
+                yblock += 1
+                ybp = 0
+
+    return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}}
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex,
+                                              {{dtype}}_t xfill,
+                                              ndarray y_, IntIndex yindex,
+                                              {{dtype}}_t yfill):
+    cdef:
+        IntIndex out_index
+        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        int32_t xloc, yloc
+        ndarray[int32_t, ndim=1] xindices, yindices, out_indices
+        ndarray[{{dtype}}_t, ndim=1] x, y
+        ndarray[{{rdtype}}_t, ndim=1] out
+
+    # suppress Cython compiler warnings due to inlining
+    x = x_
+    y = y_
+
+    # need to do this first to know size of result array
+    out_index = xindex.make_union(yindex)
+    out = np.empty(out_index.npoints, dtype=np.{{rdtype}})
+
+    xindices = xindex.indices
+    yindices = yindex.indices
+    out_indices = out_index.indices
+
+    # walk the two SparseVectors, adding matched locations...
+    for out_i from 0 <= out_i < out_index.npoints:
+        if xi == xindex.npoints:
+            # use x fill value
+            out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
+            yi += 1
+            continue
+
+        if yi == yindex.npoints:
+            # use y fill value
+            out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
+            xi += 1
+            continue
+
+        xloc = xindices[xi]
+        yloc = yindices[yi]
+
+        # each index in the out_index had to come from either x, y, or both
+        if xloc == yloc:
+            out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}}
+            xi += 1
+            yi += 1
+        elif xloc < yloc:
+            # use y fill value
+            out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
+            xi += 1
+        else:
+            # use x fill value
+            out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
+            yi += 1
+
+    return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}}
+
+
+cpdef sparse_{{opname}}_{{dtype}}(ndarray[{{dtype}}_t, ndim=1] x,
+                                  SparseIndex xindex, {{dtype}}_t xfill,
+                                  ndarray[{{dtype}}_t, ndim=1] y,
+                                  SparseIndex yindex, {{dtype}}_t yfill):
+
+    if isinstance(xindex, BlockIndex):
+        return block_op_{{opname}}_{{dtype}}(x, xindex.to_block_index(), xfill,
+                                             y, yindex.to_block_index(), yfill)
+    elif isinstance(xindex, IntIndex):
+        return int_op_{{opname}}_{{dtype}}(x, xindex.to_int_index(), xfill,
+                                           y, yindex.to_int_index(), yfill)
+    else:
+        raise NotImplementedError
+
+
+cpdef sparse_align_{{opname}}_{{dtype}}(ndarray[{{dtype}}_t, ndim=1] x,
+                                        ndarray[{{dtype}}_t, ndim=1] y):
+    """ to return NumPy compat result """
+    cdef:
+        Py_ssize_t i = 0
+        ndarray[{{rdtype}}_t, ndim=1] out
+
+    out = np.empty(len(x), dtype=np.{{rdtype}})
+
+    for i in range(len(x)):
+        out[i] = {{(opname, 'x[i]', 'y[i]', dtype) | get_op}}
+    return out
+
+
+cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill,
+                                       {{dtype}}_t yfill):
+    return {{(opname, 'xfill', 'yfill', dtype) | get_op}}
+
+{{endfor}}
diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py
index be7a0eccf6b7c..440e433ffd95c 100644
--- a/pandas/tests/series/test_subclass.py
+++ b/pandas/tests/series/test_subclass.py
@@ -54,8 +54,13 @@ def test_subclass_sparse_slice(self):
     def test_subclass_sparse_addition(self):
         s1 = tm.SubclassedSparseSeries([1, 3, 5])
         s2 = tm.SubclassedSparseSeries([-2, 5, 12])
-        tm.assert_sp_series_equal(s1 + s2,
-                                  tm.SubclassedSparseSeries([-1.0, 8.0, 17.0]))
+        exp = tm.SubclassedSparseSeries([-1, 8, 17])
+        tm.assert_sp_series_equal(s1 + s2, exp)
+
+        s1 = tm.SubclassedSparseSeries([4.0, 5.0, 6.0])
+        s2 = tm.SubclassedSparseSeries([1.0, 2.0, 3.0])
+        exp = tm.SubclassedSparseSeries([5., 7., 9.])
+        tm.assert_sp_series_equal(s1 + s2, exp)
 
     def test_subclass_sparse_to_frame(self):
         s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx')
diff --git a/setup.py b/setup.py
index e81cae633427d..7ef907aada6dc 100755
--- a/setup.py
+++ b/setup.py
@@ -108,7 +108,8 @@ def is_platform_mac():
 _pxipath = pjoin('pandas', 'src')
 _pxifiles = ['algos_common_helper.pxi.in', 'algos_groupby_helper.pxi.in',
              'algos_join_helper.pxi.in', 'algos_take_helper.pxi.in',
-             'hashtable_class_helper.pxi.in', 'hashtable_func_helper.pxi.in']
+             'hashtable_class_helper.pxi.in', 'hashtable_func_helper.pxi.in',
+             'sparse_op_helper.pxi.in']
 
 
 class build_ext(_build_ext):