Source code for bw2data.backends.typos
import warnings
from functools import partial
from typing import Iterable
from bw2data.configuration import typo_settings
try:
from rapidfuzz.distance import DamerauLevenshtein
[docs]
damerau_levenshtein = DamerauLevenshtein.distance
except ImportError:
# Can happen on Windows, see
# https://github.com/rapidfuzz/RapidFuzz/tree/main?tab=readme-ov-file#with-pip
# Rapidfuzz is not currently available on Emscripten
# https://github.com/brightway-lca/brightway-live/issues/59
from bw2data.string_distance import damerau_levenshtein
[docs]
def _check_type(type_value: str, kind: str, valid: Iterable[str]) -> None:
"""
Validates the `type_value against a set of valid types. If the `type_value`
is a close match (based on Damerau-Levenshtein distance) to any of the valid types
a warning is raised indicating a possible typo.
Parameters
----------
type_value : str
The type value to be checked.
kind : str
The category of the type being checked (e.g., 'activity', 'exchange').
valid : Iterable[str]
An iterable of valid type values.
Raises
------
UserWarning
Warns if `type_value` is not in `valid` but is close to a valid value.
Examples
--------
>>> _check_type("actvty", "activity", ["activity", "process"])
Possible typo found: Given activity type `actvty` but `activity` is more common
"""
if type_value and type_value not in valid and isinstance(type_value, str):
possibles = sorted(
((damerau_levenshtein(type_value, possible), possible) for possible in valid),
key=lambda x: x[0],
)
if possibles and possibles[0][0] <= 2:
warning_message = (
f"Possible typo found: Given {kind} type `{type_value}` but "
f"`{possibles[0][1]}` is more common"
)
warnings.warn(warning_message, UserWarning)
[docs]
def _check_keys(obj: dict, kind: str, valid: Iterable[str]) -> None:
"""
Checks keys of a dictionary `obj` against a set of valid keys. If a key
is a close match to any of the valid keys, a warning is raised indicating
a possible incorrect key.
Parameters
----------
obj : dict
The dictionary whose keys are to be checked.
kind : str
The category of the keys being checked (e.g., 'activity', 'exchange').
valid : Iterable[str]
An iterable of valid key values.
Raises
------
UserWarning
Warns if a key in `obj` is not in `valid` but is close to a valid key.
Examples
--------
>>> _check_keys({"actvty": "value"}, "activity", ["activity", "process"])
Possible incorrect activity key found: Given `actvty` but `activity` is more common
"""
for key in obj:
if key not in valid and isinstance(key, str):
possibles = sorted(
((damerau_levenshtein(key, possible), possible) for possible in valid),
key=lambda x: x[0],
)
if possibles and possibles[0][0] < 2 and len(possibles[0][1]) >= len(key):
warnings.warn(
f"Possible incorrect {kind} key found: Given `{key}` but "
f"`{possibles[0][1]}` is more common"
)
[docs]
check_activity_type = partial(_check_type, valid=typo_settings.node_types, kind="activity")
[docs]
check_exchange_type = partial(_check_type, valid=typo_settings.edge_types, kind="exchange")
[docs]
check_activity_keys = partial(_check_keys, valid=typo_settings.node_keys, kind="activity")
[docs]
check_exchange_keys = partial(_check_keys, valid=typo_settings.edge_keys, kind="exchange")