Skip to content

Commit 70997ef

Browse files
authored
Suggest the correct name when no key matches in the dataset (#9943)
* Add "did you mean" function * improve error for wrong key in dataset * Prioritize best guess * increase number of valid suggestions to match previous idea * Update dataset.py * Update utils.py * Update whats-new.rst * Update whats-new.rst
1 parent 5761de6 commit 70997ef

File tree

3 files changed

+52
-1
lines changed

3 files changed

+52
-1
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ New Features
5252
~~~~~~~~~~~~
5353
- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`).
5454
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_ and `Spencer Clark <https://github.com/spencerkclark>`_.
55+
- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`)
56+
By `Jimmy Westling <https://github.com/illviljan>`_.
5557

5658
Breaking changes
5759
~~~~~~~~~~~~~~~~

xarray/core/dataset.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1610,7 +1610,14 @@ def __getitem__(
16101610
try:
16111611
return self._construct_dataarray(key)
16121612
except KeyError as e:
1613-
message = f"No variable named {key!r}. Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}"
1613+
message = f"No variable named {key!r}."
1614+
1615+
best_guess = utils.did_you_mean(key, self.variables.keys())
1616+
if best_guess:
1617+
message += f" {best_guess}"
1618+
else:
1619+
message += f" Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}"
1620+
16141621
# If someone attempts `ds['foo' , 'bar']` instead of `ds[['foo', 'bar']]`
16151622
if isinstance(key, tuple):
16161623
message += f"\nHint: use a list to select multiple variables, for example `ds[{list(key)}]`"

xarray/core/utils.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from __future__ import annotations
3838

3939
import contextlib
40+
import difflib
4041
import functools
4142
import importlib
4243
import inspect
@@ -114,6 +115,47 @@ def wrapper(*args, **kwargs):
114115
return wrapper
115116

116117

118+
def did_you_mean(
119+
word: Hashable, possibilities: Iterable[Hashable], *, n: int = 10
120+
) -> str:
121+
"""
122+
Suggest a few correct words based on a list of possibilites
123+
124+
Parameters
125+
----------
126+
word : Hashable
127+
Word to compare to a list of possibilites.
128+
possibilities : Iterable of Hashable
129+
The iterable of Hashable that contains the correct values.
130+
n : int, default: 10
131+
Maximum number of suggestions to show.
132+
133+
Examples
134+
--------
135+
>>> did_you_mean("bluch", ("blech", "gray_r", 1, None, (2, 56)))
136+
"Did you mean one of ('blech',)?"
137+
>>> did_you_mean("none", ("blech", "gray_r", 1, None, (2, 56)))
138+
'Did you mean one of (None,)?'
139+
140+
See also
141+
--------
142+
https://en.wikipedia.org/wiki/String_metric
143+
"""
144+
# Convert all values to string, get_close_matches doesn't handle all hashables:
145+
possibilites_str: dict[str, Hashable] = {str(k): k for k in possibilities}
146+
147+
msg = ""
148+
if len(
149+
best_str := difflib.get_close_matches(
150+
str(word), list(possibilites_str.keys()), n=n
151+
)
152+
):
153+
best = tuple(possibilites_str[k] for k in best_str)
154+
msg = f"Did you mean one of {best}?"
155+
156+
return msg
157+
158+
117159
def get_valid_numpy_dtype(array: np.ndarray | pd.Index) -> np.dtype:
118160
"""Return a numpy compatible dtype from either
119161
a numpy array or a pandas.Index.

0 commit comments

Comments
 (0)