Source code for bw2io.extractors.exiobase

import csv
import re
import zipfile
from pathlib import Path

from tqdm import tqdm


[docs] def remove_numerics(string): """ Remove numeric values enclosed in parentheses from a given string, e.g. 'Tobacco products (16)' into 'Tobacco products'. Parameters ---------- string : str The string to be processed Returns ------- str The processed string without numeric values enclosed in parentheses. """ return re.sub(r" \(\d\d\)$", "", string)
[docs] class Exiobase3MonetaryDataExtractor(object): @classmethod
[docs] def _get_path(cls, dirpath): """ Get the directory path of the EXIOBASE data file. Parameters ---------- dirpath : str The path of the EXIOBASE data file or directory. Returns ------- Path The directory path of the EXIOBASE data file. """ path = Path(dirpath) if path.is_file() and path.suffix.lower() == ".zip": zf = zipfile.ZipFile(path) if zf.namelist()[0].startswith("IOT_"): root_dir = zf.namelist()[0].split("/")[0] path = zipfile.Path(zf, root_dir) else: path = zipfile.Path(zf) else: assert path.is_dir(), "Must supply path to EXIOBASE data folder" assert ( path / "A.txt" ).is_file(), "Directory path must include Exiobase files" return path
@classmethod
[docs] def _get_production_volumes(cls, dirpath): """ Extract production volumes from the EXIOBASE data file. Parameters ---------- dirpath : str The path of the EXIOBASE data file or directory. Returns ------- dict A dictionary for the production volume. """ if not (dirpath / "x.txt").is_file(): return {} with (dirpath / "x.txt").open() as csvfile: reader = csv.DictReader(csvfile, delimiter="\t") data = { (row["sector"], row["region"]): float(row["indout"]) for row in reader } return data
@classmethod
[docs] def _get_unit_data(cls, dirpath): """ Extract unit data from the EXIOBASE data file. Parameters ---------- dirpath : str The path to the EXIOBASE data file or directory. Returns ------- dict A dctionary of unit data from the EXIOBase data file. """ lookup = {"M.EUR": "million €"} with (dirpath / "unit.txt").open() as csvfile: reader = csv.DictReader(csvfile, delimiter="\t") data = { (row["sector"], row["region"]): lookup[row["unit"]] for row in reader } return data
@classmethod
[docs] def get_flows(cls, dirpath): """ Extract flows from an EXIOBASE data file. Parameters ---------- dirpath : str The path of the EXIOBASE data file or directory. Returns ------- dict A dictionary of flows from the EXIOBASE data file. """ dirpath = cls._get_path(dirpath) with (dirpath / "satellite" / "unit.txt").open() as csvfile: reader = csv.reader(csvfile, delimiter="\t") next(reader) data = {o[0]: o[1] for o in reader} return data
@classmethod
[docs] def get_products(cls, dirpath): """ Get product information from a given directory. Parameters ---------- dirpath : str The path to the directory with the product information. Returns ------- list A list of dictionaries with the following keys: - 'name': str The product name. - 'location': str The product location. - 'unit': str The product's unit of measure. - 'production volume': float The total production volume for the product. """ dirpath = cls._get_path(dirpath) units = cls._get_unit_data(dirpath) volumes = cls._get_production_volumes(dirpath) return [ { "name": key[0], "location": key[1], "unit": units[key], "production volume": volumes.get(key, 0), } for key in units ]
@classmethod
[docs] def get_technosphere_iterator( cls, dirpath, num_products, ignore_small_balancing_corrections=True ): """ Get an iterator in a given directory. Parameters ---------- dirpath : str The path to the directory with the data. num_products : int The number of products. ignore_small_balancing_corrections : bool, optional Ignore small balancing corrections. By default True. """ dirpath = cls._get_path(dirpath) with (dirpath / "A.txt").open() as f: reader = csv.reader(f, delimiter="\t") locations = next(reader)[2:] names = [remove_numerics(o) for o in next(reader)[2:]] for line in tqdm(reader): inpt = (remove_numerics(line[1]), line[0]) for index, elem in enumerate(line[2:]): if elem and float(elem) != 0: if ( ignore_small_balancing_corrections and abs(float(elem)) < 1e-15 ): continue else: yield (inpt, (names[index], locations[index]), float(elem))
@classmethod
[docs] def get_biosphere_iterator(cls, dirpath, ignore_small_balancing_corrections=True): """ Returns an iterator that yields tuples of flow names, locations, and amounts. Parameters ---------- dirpath : str The path to the directory. ignore_small_balancing_corrections : bool, optional Ignore small balancing corrections. By default True. """ dirpath = cls._get_path(dirpath) with (dirpath / "satellite" / "S.txt").open() as f: reader = csv.reader(f, delimiter="\t") locations = next(reader)[1:] names = [remove_numerics(o) for o in next(reader)[1:]] for line in tqdm(reader): flow = line[0] for index, elem in enumerate(line[1:]): if elem and float(elem) != 0: if ( ignore_small_balancing_corrections and abs(float(elem)) < 1e-15 ): continue else: yield (flow, (names[index], locations[index]), float(elem))