Skip to content

Commit 2a3e2bc

Browse files
willschlitzerweiji14michaelgrundseisman
authored
Wrap filter1d (#1512)
Co-authored-by: Wei Ji <[email protected]> Co-authored-by: Michael Grund <[email protected]> Co-authored-by: Dongdong Tian <[email protected]>
1 parent d8a7f9e commit 2a3e2bc

File tree

5 files changed

+244
-0
lines changed

5 files changed

+244
-0
lines changed

doc/api/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ Operations on tabular data
117117
blockmean
118118
blockmedian
119119
blockmode
120+
filter1d
120121
nearneighbor
121122
project
122123
select

pygmt/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
blockmode,
3535
config,
3636
dimfilter,
37+
filter1d,
3738
grd2cpt,
3839
grd2xyz,
3940
grdclip,

pygmt/src/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pygmt.src.config import config
1111
from pygmt.src.contour import contour
1212
from pygmt.src.dimfilter import dimfilter
13+
from pygmt.src.filter1d import filter1d
1314
from pygmt.src.grd2cpt import grd2cpt
1415
from pygmt.src.grd2xyz import grd2xyz
1516
from pygmt.src.grdclip import grdclip

pygmt/src/filter1d.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
"""
2+
filter1d - Time domain filtering of 1-D data tables
3+
"""
4+
import warnings
5+
6+
import pandas as pd
7+
from pygmt.clib import Session
8+
from pygmt.exceptions import GMTInvalidInput
9+
from pygmt.helpers import GMTTempFile, build_arg_string, fmt_docstring, use_alias
10+
11+
12+
@fmt_docstring
13+
@use_alias(
14+
E="end",
15+
F="filter_type",
16+
N="time_col",
17+
)
18+
def filter1d(data, output_type="pandas", outfile=None, **kwargs):
19+
r"""
20+
Time domain filtering of 1-D data tables.
21+
22+
A general time domain filter for multiple column time
23+
series data. The user specifies which column is the time (i.e., the
24+
independent variable) via ``time_col``. The fastest operation
25+
occurs when the input time series are equally spaced and have no gaps or
26+
outliers and the special options are not needed.
27+
Read a table and output as a :class:`numpy.ndarray`,
28+
:class:`pandas.DataFrame`, or ASCII file.
29+
30+
Full option list at :gmt-docs:`filter1d.html`
31+
32+
{aliases}
33+
34+
Parameters
35+
----------
36+
filter_type : str
37+
**type**\ *width*\ [**+h**].
38+
Sets the filter **type**. Choose among convolution and non-convolution
39+
filters. Append the filter code followed by the full filter
40+
*width* in same units as time column. By default, this
41+
performs a low-pass filtering; append **+h** to select high-pass
42+
filtering. Some filters allow for optional arguments and a modifier.
43+
44+
Available convolution filter types are:
45+
46+
- (**b**) Boxcar: All weights are equal.
47+
- (**c**) Cosine Arch: Weights follow a cosine arch curve.
48+
- (**g**) Gaussian: Weights are given by the Gaussian function.
49+
- (**f**) Custom: Instead of *width* give name of a one-column file
50+
with your own weight coefficients.
51+
52+
Non-convolution filter types are:
53+
54+
- (**m**) Median: Returns median value.
55+
- (**p**) Maximum likelihood probability (a mode estimator): Return
56+
modal value. If more than one mode is found we return their average
57+
value. Append **+l** or **+u** if you rather want
58+
to return the lowermost or uppermost of the modal values.
59+
- (**l**) Lower: Return the minimum of all values.
60+
- (**L**) Lower: Return minimum of all positive values only.
61+
- (**u**) Upper: Return maximum of all values.
62+
- (**U**) Upper: Return maximum of all negative values only.
63+
64+
Upper case type **B**, **C**, **G**, **M**, **P**, **F** will use
65+
robust filter versions: i.e., replace outliers (2.5 L1 scale off
66+
median, using 1.4826 \* median absolute deviation [MAD]) with median
67+
during filtering.
68+
69+
In the case of **L**\|\ **U** it is possible that no data passes
70+
the initial sign test; in that case the filter will return 0.0.
71+
Apart from custom coefficients (**f**), the other filters may accept
72+
variable filter widths by passing *width* as a two-column time-series
73+
file with filter widths in the second column. The filter-width file
74+
does not need to be co-registered with the data as we obtain the
75+
required filter width at each output location via interpolation. For
76+
multi-segment data files the filter file must either have the same
77+
number of segments or just a single segment to be used for all data
78+
segments.
79+
80+
end : bool
81+
Include ends of time series in output. The default [False] loses
82+
half the filter-width of data at each end.
83+
84+
time_col : int
85+
Indicates which column contains the independent variable (time). The
86+
left-most column is 0, while the right-most is (*n_cols* - 1)
87+
[Default is 0].
88+
89+
output_type : str
90+
Determine the format the xyz data will be returned in [Default is
91+
``pandas``]:
92+
93+
- ``numpy`` - :class:`numpy.ndarray`
94+
- ``pandas``- :class:`pandas.DataFrame`
95+
- ``file`` - ASCII file (requires ``outfile``)
96+
outfile : str
97+
The file name for the output ASCII file.
98+
99+
Returns
100+
-------
101+
ret : pandas.DataFrame or numpy.ndarray or None
102+
Return type depends on ``outfile`` and ``output_type``:
103+
104+
- None if ``outfile`` is set (output will be stored in file set by
105+
``outfile``)
106+
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is
107+
not set (depends on ``output_type`` [Default is
108+
:class:`pandas.DataFrame`])
109+
110+
"""
111+
if kwargs.get("F") is None:
112+
raise GMTInvalidInput("Pass a required argument to 'filter_type'.")
113+
if output_type not in ["numpy", "pandas", "file"]:
114+
raise GMTInvalidInput("Must specify format as either numpy, pandas, or file.")
115+
if outfile is not None and output_type != "file":
116+
msg = (
117+
f"Changing `output_type` of filter1d from '{output_type}' to 'file' "
118+
"since `outfile` parameter is set. Please use `output_type='file'` "
119+
"to silence this warning."
120+
)
121+
warnings.warn(msg, category=RuntimeWarning, stacklevel=2)
122+
output_type = "file"
123+
elif output_type == "file" and outfile is None:
124+
raise GMTInvalidInput("Must specify outfile for ASCII output.")
125+
126+
with GMTTempFile() as tmpfile:
127+
with Session() as lib:
128+
file_context = lib.virtualfile_from_data(check_kind="vector", data=data)
129+
with file_context as infile:
130+
if outfile is None:
131+
outfile = tmpfile.name
132+
lib.call_module(
133+
module="filter1d",
134+
args=build_arg_string(kwargs, infile=infile, outfile=outfile),
135+
)
136+
137+
# Read temporary csv output to a pandas table
138+
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame
139+
result = pd.read_csv(tmpfile.name, sep="\t", comment=">")
140+
elif outfile != tmpfile.name: # return None if outfile set, output in outfile
141+
result = None
142+
143+
if output_type == "numpy":
144+
result = result.to_numpy()
145+
return result

pygmt/tests/test_filter1d.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""
2+
Tests for filter1d.
3+
"""
4+
5+
import os
6+
7+
import numpy as np
8+
import pandas as pd
9+
import pytest
10+
from pygmt import filter1d
11+
from pygmt.exceptions import GMTInvalidInput
12+
from pygmt.helpers import GMTTempFile
13+
from pygmt.src import which
14+
15+
16+
@pytest.fixture(scope="module", name="data")
17+
def fixture_table():
18+
"""
19+
Load the grid data from the sample earth_relief file.
20+
"""
21+
fname = which("@MaunaLoa_CO2.txt", download="c")
22+
data = pd.read_csv(
23+
fname, header=None, skiprows=1, sep=r"\s+", names=["date", "co2_ppm"]
24+
)
25+
return data
26+
27+
28+
def test_filter1d_no_outfile(data):
29+
"""
30+
Test filter1d with no set outgrid.
31+
"""
32+
result = filter1d(data=data, filter_type="g5")
33+
assert result.shape == (670, 2)
34+
35+
36+
def test_filter1d_file_output(data):
37+
"""
38+
Test that filter1d returns a file output when it is specified.
39+
"""
40+
with GMTTempFile(suffix=".txt") as tmpfile:
41+
result = filter1d(
42+
data=data, filter_type="g5", outfile=tmpfile.name, output_type="file"
43+
)
44+
assert result is None # return value is None
45+
assert os.path.exists(path=tmpfile.name) # check that outfile exists
46+
47+
48+
def test_filter1d_invalid_format(data):
49+
"""
50+
Test that filter1d fails with an incorrect format for output_type.
51+
"""
52+
with pytest.raises(GMTInvalidInput):
53+
filter1d(data=data, filter_type="g5", output_type="a")
54+
55+
56+
def test_filter1d_no_filter(data):
57+
"""
58+
Test that filter1d fails with an argument is missing for filter.
59+
"""
60+
with pytest.raises(GMTInvalidInput):
61+
filter1d(data=data)
62+
63+
64+
def test_filter1d_no_outfile_specified(data):
65+
"""
66+
Test that filter1d fails when outpput_type is set to 'file' but no output
67+
file name is specified.
68+
"""
69+
with pytest.raises(GMTInvalidInput):
70+
filter1d(data=data, filter_type="g5", output_type="file")
71+
72+
73+
def test_filter1d_outfile_incorrect_output_type(data):
74+
"""
75+
Test that filter1d raises a warning when an outfile filename is set but the
76+
output_type is not set to 'file'.
77+
"""
78+
with pytest.warns(RuntimeWarning):
79+
with GMTTempFile(suffix=".txt") as tmpfile:
80+
result = filter1d(
81+
data=data, filter_type="g5", outfile=tmpfile.name, output_type="numpy"
82+
)
83+
assert result is None # return value is None
84+
assert os.path.exists(path=tmpfile.name) # check that outfile exists
85+
86+
87+
def test_filter1d_format(data):
88+
"""
89+
Test that correct formats are returned.
90+
"""
91+
time_series_default = filter1d(data=data, filter_type="g5")
92+
assert isinstance(time_series_default, pd.DataFrame)
93+
time_series_array = filter1d(data=data, filter_type="g5", output_type="numpy")
94+
assert isinstance(time_series_array, np.ndarray)
95+
time_series_df = filter1d(data=data, filter_type="g5", output_type="pandas")
96+
assert isinstance(time_series_df, pd.DataFrame)

0 commit comments

Comments
 (0)