Source code for bw2io.utils

import hashlib
import json
import math
import os
import pprint
from numbers import Number

from stats_arrays import (
    LognormalUncertainty,
    NormalUncertainty,
    NoUncertainty,
    TriangularUncertainty,
    UndefinedUncertainty,
    UniformUncertainty,
)

from .errors import UnsupportedExchange

[docs] DEFAULT_FIELDS = ("name", "categories", "unit", "reference product", "location")
[docs] def activity_hash(data, fields=None, case_insensitive=True): """ Hash an activity dataset. Used to import data formats like ecospold 1 (ecoinvent v1-2) and SimaPro, where no unique attributes for datasets are given. This is clearly an imperfect and brittle solution, but there is no other obvious approach at this time. By default, uses the following, in order: * name * categories * unit * reference product * location Parameters ---------- data : dict The :ref:`activity dataset data <database-documents>`. fields : list, optional Optional list of fields to hash together. Default is ``('name', 'categories', 'unit', 'reference product', 'location')``. An empty string is used if a field isn't present. All fields are cast to lower case. case_insensitive : bool, optional Cast everything to lowercase before computing hash. Default is ``True``. Returns ------- str A MD5 hash string, hex-encoded. """ lower = lambda x: x.lower() if case_insensitive else x def get_value(obj, field): if isinstance(data.get(field), (list, tuple)): return lower("".join(data.get(field) or [])) else: return lower(data.get(field) or "") fields = fields or DEFAULT_FIELDS string = "".join([get_value(data, field) for field in fields]) return str(hashlib.md5(string.encode("utf-8")).hexdigest())
[docs] def es2_activity_hash(activity, flow): """ Generate unique ID for ecoinvent3 dataset. Despite using a million UUIDs, there is actually no unique ID in an ecospold2 dataset. Datasets are uniquely identified by the combination of activity and flow UUIDs. Parameters ---------- activity : str The activity UUID. flow : str The flow UUID. Returns ------- str The unique ID. """ return str(hashlib.md5((activity + flow).encode("utf-8")).hexdigest())
[docs] def load_json_data_file(filename): DATA_DIR = os.path.join(os.path.dirname(__file__), "data") if filename[-5:] != ".json": filename = filename + ".json" return json.load(open(os.path.join(DATA_DIR, filename), encoding="utf-8"))
[docs] def format_for_logging(obj): return pprint.pformat(obj, indent=2)
[docs] def rescale_exchange(exc: dict, factor: float) -> dict: """ Rescale exchanges, including formulas and uncertainty values, by a constant factor. Parameters ---------- exc : dict The exchange to rescale. factor : float The factor to rescale by. Returns ------- dict The rescaled exchange. Raises ------ ValueError If factor is not a number. """ if not isinstance(factor, Number) or factor is True or factor is False: raise ValueError(f"`factor` must be a number, but got {type(factor)}") if factor == 0: exc.update( { "uncertainty type": UndefinedUncertainty.id, "loc": exc["amount"] * factor, "amount": exc["amount"] * factor, } ) for field in ("scale", "shape", "minimum", "maximum", "negative"): if field in exc: del exc[field] if exc.get("formula"): exc["formula"] = "({}) * {}".format(exc["formula"], factor) if exc.get("uncertainty type", 0) in (UndefinedUncertainty.id, NoUncertainty.id): exc["amount"] = exc["loc"] = factor * exc["amount"] elif exc["uncertainty type"] == NormalUncertainty.id: exc.update( { "scale": abs(exc["scale"] * factor), "loc": exc["amount"] * factor, "amount": exc["amount"] * factor, } ) elif exc["uncertainty type"] == LognormalUncertainty.id: exc.update( { "loc": math.log(abs(exc["amount"] * factor)), "negative": (exc["amount"] * factor) < 0, "amount": exc["amount"] * factor, } ) elif exc["uncertainty type"] == UniformUncertainty.id: exc["minimum"] *= factor exc["maximum"] *= factor if "amount" in exc: exc["amount"] = exc["loc"] = factor * exc["amount"] else: exc["amount"] = exc["loc"] = (exc["minimum"] + exc["maximum"]) / 2 elif exc["uncertainty type"] == TriangularUncertainty.id: exc["minimum"] *= factor exc["maximum"] *= factor exc["amount"] = exc["loc"] = factor * exc["amount"] else: raise UnsupportedExchange("This exchange type can't be automatically rescaled") # negative flag only used in lognormal but can be incorrect if # scale < 0 so best to just delete it if exc.get("uncertainty type") != LognormalUncertainty.id and "negative" in exc: del exc["negative"] if exc.get("uncertainty type") not in ( TriangularUncertainty.id, UniformUncertainty.id, ): for field in ("minimum", "maximum"): if field in exc: exc[field] *= factor if factor < 0 and "minimum" in exc and "maximum" in exc: exc["minimum"], exc["maximum"] = exc["maximum"], exc["minimum"] elif factor < 0 and "minimum" in exc: exc["maximum"] = exc.pop("minimum") elif factor < 0 and "maximum" in exc: exc["minimum"] = exc.pop("maximum") return exc
[docs] def standardize_method_to_len_3(name, padding="--", joiner=","): """ Standardize an LCIA method name to a length 3 tuple. Parameters ---------- name : tuple The current name. padding : str, optional The string to use for missing fields. The default is "--". joiner : str, optional The string to use to join the fields. The default is ",". Returns ------- tuple The standardized name. """ if len(name) >= 3: return tuple(name)[:2] + (joiner.join(name[2:]),) else: return (tuple(name) + (padding,) * 3)[:3]