Source code for bw2io.extractors.exiobase
import csv
import re
import zipfile
from pathlib import Path
from tqdm import tqdm
[docs]
def remove_numerics(string):
"""
Remove numeric values enclosed in parentheses from a given string, e.g. 'Tobacco products (16)' into 'Tobacco products'.
Parameters
----------
string : str
The string to be processed
Returns
-------
str
The processed string without numeric values enclosed in parentheses.
"""
return re.sub(r" \(\d\d\)$", "", string)
[docs]
class Exiobase3MonetaryDataExtractor(object):
@classmethod
[docs]
def _get_path(cls, dirpath):
"""
Get the directory path of the EXIOBASE data file.
Parameters
----------
dirpath : str
The path of the EXIOBASE data file or directory.
Returns
-------
Path
The directory path of the EXIOBASE data file.
"""
path = Path(dirpath)
if path.is_file() and path.suffix.lower() == ".zip":
zf = zipfile.ZipFile(path)
if zf.namelist()[0].startswith("IOT_"):
root_dir = zf.namelist()[0].split("/")[0]
path = zipfile.Path(zf, root_dir)
else:
path = zipfile.Path(zf)
else:
assert path.is_dir(), "Must supply path to EXIOBASE data folder"
assert (
path / "A.txt"
).is_file(), "Directory path must include Exiobase files"
return path
@classmethod
[docs]
def _get_production_volumes(cls, dirpath):
"""
Extract production volumes from the EXIOBASE data file.
Parameters
----------
dirpath : str
The path of the EXIOBASE data file or directory.
Returns
-------
dict
A dictionary for the production volume.
"""
if not (dirpath / "x.txt").is_file():
return {}
with (dirpath / "x.txt").open() as csvfile:
reader = csv.DictReader(csvfile, delimiter="\t")
data = {
(row["sector"], row["region"]): float(row["indout"]) for row in reader
}
return data
@classmethod
[docs]
def _get_unit_data(cls, dirpath):
"""
Extract unit data from the EXIOBASE data file.
Parameters
----------
dirpath : str
The path to the EXIOBASE data file or directory.
Returns
-------
dict
A dctionary of unit data from the EXIOBase data file.
"""
lookup = {"M.EUR": "million β¬"}
with (dirpath / "unit.txt").open() as csvfile:
reader = csv.DictReader(csvfile, delimiter="\t")
data = {
(row["sector"], row["region"]): lookup[row["unit"]] for row in reader
}
return data
@classmethod
[docs]
def get_flows(cls, dirpath):
"""
Extract flows from an EXIOBASE data file.
Parameters
----------
dirpath : str
The path of the EXIOBASE data file or directory.
Returns
-------
dict
A dictionary of flows from the EXIOBASE data file.
"""
dirpath = cls._get_path(dirpath)
with (dirpath / "satellite" / "unit.txt").open() as csvfile:
reader = csv.reader(csvfile, delimiter="\t")
next(reader)
data = {o[0]: o[1] for o in reader}
return data
@classmethod
[docs]
def get_products(cls, dirpath):
"""
Get product information from a given directory.
Parameters
----------
dirpath : str
The path to the directory with the product information.
Returns
-------
list
A list of dictionaries with the following keys:
- 'name': str
The product name.
- 'location': str
The product location.
- 'unit': str
The product's unit of measure.
- 'production volume': float
The total production volume for the product.
"""
dirpath = cls._get_path(dirpath)
units = cls._get_unit_data(dirpath)
volumes = cls._get_production_volumes(dirpath)
return [
{
"name": key[0],
"location": key[1],
"unit": units[key],
"production volume": volumes.get(key, 0),
}
for key in units
]
@classmethod
[docs]
def get_technosphere_iterator(
cls, dirpath, num_products, ignore_small_balancing_corrections=True
):
"""
Get an iterator in a given directory.
Parameters
----------
dirpath : str
The path to the directory with the data.
num_products : int
The number of products.
ignore_small_balancing_corrections : bool, optional
Ignore small balancing corrections. By default True.
"""
dirpath = cls._get_path(dirpath)
with (dirpath / "A.txt").open() as f:
reader = csv.reader(f, delimiter="\t")
locations = next(reader)[2:]
names = [remove_numerics(o) for o in next(reader)[2:]]
for line in tqdm(reader):
inpt = (remove_numerics(line[1]), line[0])
for index, elem in enumerate(line[2:]):
if elem and float(elem) != 0:
if (
ignore_small_balancing_corrections
and abs(float(elem)) < 1e-15
):
continue
else:
yield (inpt, (names[index], locations[index]), float(elem))
@classmethod
[docs]
def get_biosphere_iterator(cls, dirpath, ignore_small_balancing_corrections=True):
"""
Returns an iterator that yields tuples of flow names, locations, and amounts.
Parameters
----------
dirpath : str
The path to the directory.
ignore_small_balancing_corrections : bool, optional
Ignore small balancing corrections. By default True.
"""
dirpath = cls._get_path(dirpath)
with (dirpath / "satellite" / "S.txt").open() as f:
reader = csv.reader(f, delimiter="\t")
locations = next(reader)[1:]
names = [remove_numerics(o) for o in next(reader)[1:]]
for line in tqdm(reader):
flow = line[0]
for index, elem in enumerate(line[1:]):
if elem and float(elem) != 0:
if (
ignore_small_balancing_corrections
and abs(float(elem)) < 1e-15
):
continue
else:
yield (flow, (names[index], locations[index]), float(elem))