Source code for bw_hestia_bridge.hestia_api.querying

import re
from typing import Any, Literal, Optional, Union

from ..utils import get_config
from .base_api import hestia_request, nested_elements, valid_types


[docs] def search_hestia( query: Union[str, dict[str, str]], node_type: Optional[str] = None, fields: Optional[list[str]] = None, limit: Optional[int] = 10, how: Literal["or", "and", "exact"] = "or", staging: Optional[bool] = None, ) -> list[dict[str, str]]: """ Search the Hestia database. Parameters ---------- query : str or dict A string to match to names in the Hestia database, or a dict of the form ``{"field_name": value}`` to match `field_name` instead of "name". See the examples below to see how to make more complex quieries. node_type : str, optional (default: any type) A valid type among "actor", "animal", "bibliography", "completeness", "cycle", "emission", "impactassessment", "indicator", "infrastructure", "input", "management", "measurement", "organisation", "practice", "product", "property", "site", "source", "term", "transformation", or "transport". fields : list[str], optional (default: ["@type", "name", "@id"]) Fields that will be returned in the search results. limit : int, optional (default: 10) The maximum number of results that will be returned (best match come first). how : {"or", "and", "exact"}, optional (default: "or") Whether the search tries to match any word in `query` ("or"), all words in `query` ("and") or to match the whole query exactly ("exact"). staging : bool, optional (default: from configuration) Whether to use the staging API. Returns ------- res : list[dict] A list of dicts containing the `fields` entries. Additionally, a "_score" value is returned, indicating the accuracy of the match found in the Hestia database (results are sorted by decreasing "_score"). Examples -------- One can refine the query by searching for nodes that have a product with a name matching "Saplings" by using :: search_hestia({"products.term.name": "Saplings"}) It is also possible to do multi-criteria searches as follow :: search_hestia({"name": "Ouidah", "products.term.name": "Saplings"}) """ fields = fields or ["@type", "name", "@id"] how = how or "or" staging = staging or get_config("use_staging") matches: list[dict] = [] if not isinstance(query, dict): query = {"name": query} # check the query r = r"^(?P<path>\w+)\..+" for k, v in query.items(): re_match = re.search(r, k) qk = {} if how in ("and", "or"): qk[k] = {"query": v, "operator": how} elif how == "exact": qk[f"{k}.keyword"] = v else: raise ValueError(f"Invalid `how` argument: '{how}'.") if re_match: path = re_match.groupdict()["path"] if path in nested_elements: matches.append({"nested": {"path": path, "query": {"match": qk}}}) else: matches.append({"match": qk}) else: matches.append({"match": qk}) if node_type: assert ( node_type.lower() in valid_types ), f"Valid `node_type` entries are {valid_types}" matches.append({"match": {"@type": node_type[0].upper() + node_type[1:]}}) q: dict[str, Any] = { "fields": fields, "limit": limit, "query": {"bool": {"must": matches}}, } res = hestia_request("search", staging, query=q, req_type="post") return res.get("results", [])
[docs] def get_hestia_node( node_id: Union[str, dict[str, str]], node_type: Optional[str] = None, data_state: Optional[str] = None, staging: Optional[bool] = None, ) -> dict: """ Download the Hestia node associated to `node`. Parameters ---------- node_id : str or dict[str, str] Hestia ID for the node or dictionary describing the node (e.g. returned from :func:`search_hestia`). If it's a dict, it must contain at least an "@type" and an "@id" entry. node_type : str, optional (default: try to autodetect) A valid type among "actor", "animal", "bibliography", "completeness", "cycle", "emission", "impactassessment", "indicator", "infrastructure", "input", "management", "measurement", "organisation", "practice", "product", "property", "site", "source", "term", "transformation", or "transport". If not provided, will either be taken from `node_id` if it is a dict, or default to "cycle". data_state : str, optional (default: "recalculated") Version of the data, by default, use "recalculated" to download the more detailed version of the data. Use "original" to get the raw data. staging : bool, optional (default: from configuration) Whether to use the staging API. Returns ------- node : dict The dict associated to the JSON-LD entry describing `node` in the Hestia database. """ staging = staging or get_config("use_staging") if isinstance(node_id, dict): assert "@type" in node_id, "`node` must contain an '@type' entry." assert "@id" in node_id, "`node` must contain an '@id' entry." node_type = node_id["@type"] node_id = node_id["@id"] else: node_type = node_type or get_node_type(node_id) node_type = node_type.lower() data_state = data_state or "recalculated" if node_type == "cycle": return hestia_request( f"{node_type}s/{node_id}?dataState={data_state}", staging) else: return hestia_request(f"{node_type}s/{node_id}", staging)
[docs] def get_node_type(node_id: str, staging: Optional[bool] = None) -> str: """ Get the node type from its Hestia ID Parameters ---------- node_id : str Hestia ID for the node. Returns ------- node_type : str The type of the node. staging : bool, optional (default: from configuration) Whether to use the staging API. Raises ------ ValueError : if `node_id` is not found. """ staging = staging or get_config("use_staging") res = search_hestia({"@id": node_id}, how="exact", staging=staging) if res: return res[0]["@type"].lower() api_type = "staging" if staging else "stable" raise ValueError(f"The {api_type} API found no node with ID {node_id}.")