Wrap filter1d (#1512)

willschlitzer · weiji14 · michaelgrund · web-flow · commit 2a3e2bc86d8d · 2022-05-05T19:49:33.000+08:00
Co-authored-by: Wei Ji &lt;23487320+weiji14@users.noreply.github.com&gt;
Co-authored-by: Michael Grund &lt;23025878+michaelgrund@users.noreply.github.com&gt;
Co-authored-by: Dongdong Tian &lt;seisman.info@gmail.com&gt;
diff --git a/doc/api/index.rst b/doc/api/index.rst
@@ -117,6 +117,7 @@ Operations on tabular data
     blockmean
     blockmedian
     blockmode
+    filter1d
     nearneighbor
     project
     select
diff --git a/pygmt/__init__.py b/pygmt/__init__.py
@@ -34,6 +34,7 @@
     blockmode,
     config,
     dimfilter,
+    filter1d,
     grd2cpt,
     grd2xyz,
     grdclip,
diff --git a/pygmt/src/__init__.py b/pygmt/src/__init__.py
@@ -10,6 +10,7 @@
 from pygmt.src.config import config
 from pygmt.src.contour import contour
 from pygmt.src.dimfilter import dimfilter
+from pygmt.src.filter1d import filter1d
 from pygmt.src.grd2cpt import grd2cpt
 from pygmt.src.grd2xyz import grd2xyz
 from pygmt.src.grdclip import grdclip
diff --git a/pygmt/src/filter1d.py b/pygmt/src/filter1d.py
@@ -0,0 +1,145 @@
+"""
+filter1d - Time domain filtering of 1-D data tables
+"""
+import warnings
+
+import pandas as pd
+from pygmt.clib import Session
+from pygmt.exceptions import GMTInvalidInput
+from pygmt.helpers import GMTTempFile, build_arg_string, fmt_docstring, use_alias
+
+
+@fmt_docstring
+@use_alias(
+    E="end",
+    F="filter_type",
+    N="time_col",
+)
+def filter1d(data, output_type="pandas", outfile=None, **kwargs):
+    r"""
+    Time domain filtering of 1-D data tables.
+
+    A general time domain filter for multiple column time
+    series data. The user specifies which column is the time (i.e., the
+    independent variable) via ``time_col``. The fastest operation
+    occurs when the input time series are equally spaced and have no gaps or
+    outliers and the special options are not needed.
+    Read a table and output as a :class:`numpy.ndarray`,
+    :class:`pandas.DataFrame`, or ASCII file.
+
+    Full option list at :gmt-docs:`filter1d.html`
+
+    {aliases}
+
+    Parameters
+    ----------
+    filter_type : str
+        **type**\ *width*\ [**+h**].
+        Sets the filter **type**. Choose among convolution and non-convolution
+        filters. Append the filter code followed by the full filter
+        *width* in same units as time column. By default, this
+        performs a low-pass filtering; append **+h** to select high-pass
+        filtering. Some filters allow for optional arguments and a modifier.
+
+        Available convolution filter types are:
+
+        - (**b**) Boxcar: All weights are equal.
+        - (**c**) Cosine Arch: Weights follow a cosine arch curve.
+        - (**g**) Gaussian: Weights are given by the Gaussian function.
+        - (**f**) Custom: Instead of *width* give name of a one-column file
+          with your own weight coefficients.
+
+        Non-convolution filter types are:
+
+        - (**m**) Median: Returns median value.
+        - (**p**) Maximum likelihood probability (a mode estimator): Return
+          modal value. If more than one mode is found we return their average
+          value. Append **+l** or **+u** if you rather want
+          to return the lowermost or uppermost of the modal values.
+        - (**l**) Lower: Return the minimum of all values.
+        - (**L**) Lower: Return minimum of all positive values only.
+        - (**u**) Upper: Return maximum of all values.
+        - (**U**) Upper: Return maximum of all negative values only.
+
+        Upper case type **B**, **C**, **G**, **M**, **P**, **F** will use
+        robust filter versions: i.e., replace outliers (2.5 L1 scale off
+        median, using 1.4826 \* median absolute deviation [MAD]) with median
+        during filtering.
+
+        In the case of **L**\|\ **U** it is possible that no data passes
+        the initial sign test; in that case the filter will return 0.0.
+        Apart from custom coefficients (**f**), the other filters may accept
+        variable filter widths by passing *width* as a two-column time-series
+        file with filter widths in the second column.  The filter-width file
+        does not need to be co-registered with the data as we obtain the
+        required filter width at each output location via interpolation.  For
+        multi-segment data files the filter file must either have the same
+        number of segments or just a single segment to be used for all data
+        segments.
+
+    end : bool
+        Include ends of time series in output. The default [False] loses
+        half the filter-width of data at each end.
+
+    time_col : int
+        Indicates which column contains the independent variable (time). The
+        left-most column is 0, while the right-most is (*n_cols* - 1)
+        [Default is 0].
+
+    output_type : str
+        Determine the format the xyz data will be returned in [Default is
+        ``pandas``]:
+
+            - ``numpy`` - :class:`numpy.ndarray`
+            - ``pandas``- :class:`pandas.DataFrame`
+            - ``file`` - ASCII file (requires ``outfile``)
+    outfile : str
+        The file name for the output ASCII file.
+
+    Returns
+    -------
+    ret : pandas.DataFrame or numpy.ndarray or None
+        Return type depends on ``outfile`` and ``output_type``:
+
+        - None if ``outfile`` is set (output will be stored in file set by
+          ``outfile``)
+        - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is
+          not set (depends on ``output_type`` [Default is
+          :class:`pandas.DataFrame`])
+
+    """
+    if kwargs.get("F") is None:
+        raise GMTInvalidInput("Pass a required argument to 'filter_type'.")
+    if output_type not in ["numpy", "pandas", "file"]:
+        raise GMTInvalidInput("Must specify format as either numpy, pandas, or file.")
+    if outfile is not None and output_type != "file":
+        msg = (
+            f"Changing `output_type` of filter1d from '{output_type}' to 'file' "
+            "since `outfile` parameter is set. Please use `output_type='file'` "
+            "to silence this warning."
+        )
+        warnings.warn(msg, category=RuntimeWarning, stacklevel=2)
+        output_type = "file"
+    elif output_type == "file" and outfile is None:
+        raise GMTInvalidInput("Must specify outfile for ASCII output.")
+
+    with GMTTempFile() as tmpfile:
+        with Session() as lib:
+            file_context = lib.virtualfile_from_data(check_kind="vector", data=data)
+            with file_context as infile:
+                if outfile is None:
+                    outfile = tmpfile.name
+                lib.call_module(
+                    module="filter1d",
+                    args=build_arg_string(kwargs, infile=infile, outfile=outfile),
+                )
+
+        # Read temporary csv output to a pandas table
+        if outfile == tmpfile.name:  # if user did not set outfile, return pd.DataFrame
+            result = pd.read_csv(tmpfile.name, sep="\t", comment=">")
+        elif outfile != tmpfile.name:  # return None if outfile set, output in outfile
+            result = None
+
+        if output_type == "numpy":
+            result = result.to_numpy()
+    return result
diff --git a/pygmt/tests/test_filter1d.py b/pygmt/tests/test_filter1d.py
@@ -0,0 +1,96 @@
+"""
+Tests for filter1d.
+"""
+
+import os
+
+import numpy as np
+import pandas as pd
+import pytest
+from pygmt import filter1d
+from pygmt.exceptions import GMTInvalidInput
+from pygmt.helpers import GMTTempFile
+from pygmt.src import which
+
+
+@pytest.fixture(scope="module", name="data")
+def fixture_table():
+    """
+    Load the grid data from the sample earth_relief file.
+    """
+    fname = which("@MaunaLoa_CO2.txt", download="c")
+    data = pd.read_csv(
+        fname, header=None, skiprows=1, sep=r"\s+", names=["date", "co2_ppm"]
+    )
+    return data
+
+
+def test_filter1d_no_outfile(data):
+    """
+    Test filter1d with no set outgrid.
+    """
+    result = filter1d(data=data, filter_type="g5")
+    assert result.shape == (670, 2)
+
+
+def test_filter1d_file_output(data):
+    """
+    Test that filter1d returns a file output when it is specified.
+    """
+    with GMTTempFile(suffix=".txt") as tmpfile:
+        result = filter1d(
+            data=data, filter_type="g5", outfile=tmpfile.name, output_type="file"
+        )
+        assert result is None  # return value is None
+        assert os.path.exists(path=tmpfile.name)  # check that outfile exists
+
+
+def test_filter1d_invalid_format(data):
+    """
+    Test that filter1d fails with an incorrect format for output_type.
+    """
+    with pytest.raises(GMTInvalidInput):
+        filter1d(data=data, filter_type="g5", output_type="a")
+
+
+def test_filter1d_no_filter(data):
+    """
+    Test that filter1d fails with an argument is missing for filter.
+    """
+    with pytest.raises(GMTInvalidInput):
+        filter1d(data=data)
+
+
+def test_filter1d_no_outfile_specified(data):
+    """
+    Test that filter1d fails when outpput_type is set to 'file' but no output
+    file name is specified.
+    """
+    with pytest.raises(GMTInvalidInput):
+        filter1d(data=data, filter_type="g5", output_type="file")
+
+
+def test_filter1d_outfile_incorrect_output_type(data):
+    """
+    Test that filter1d raises a warning when an outfile filename is set but the
+    output_type is not set to 'file'.
+    """
+    with pytest.warns(RuntimeWarning):
+        with GMTTempFile(suffix=".txt") as tmpfile:
+            result = filter1d(
+                data=data, filter_type="g5", outfile=tmpfile.name, output_type="numpy"
+            )
+            assert result is None  # return value is None
+            assert os.path.exists(path=tmpfile.name)  # check that outfile exists
+
+
+def test_filter1d_format(data):
+    """
+    Test that correct formats are returned.
+    """
+    time_series_default = filter1d(data=data, filter_type="g5")
+    assert isinstance(time_series_default, pd.DataFrame)
+    time_series_array = filter1d(data=data, filter_type="g5", output_type="numpy")
+    assert isinstance(time_series_array, np.ndarray)
+    time_series_df = filter1d(data=data, filter_type="g5", output_type="pandas")
+    assert isinstance(time_series_df, pd.DataFrame)