import collections
import numbers
import os
from pathlib import Path
from typing import List, Optional
import xlsxwriter
from bw2data import Database, projects
from bw_processing import safe_filename
from ..utils import activity_hash
from .csv import CSVFormatter
[docs]
def create_valid_worksheet_name(string):
"""
Exclude invalid characters and names.
Parameters
----------
string : str
String to convert to a valid worksheet name.
Returns
-------
string : str
Valid worksheet name.
Notes
-----
Data from http://www.accountingweb.com/technology/excel/seven-characters-you-cant-use-in-worksheet-names.
"""
excluded = {"\\", "/", "*", "[", "]", ":", "?"}
if string == "History":
return "History-worksheet"
for x in excluded:
string = string.replace(x, "#")
return string[:30]
[docs]
def lci_matrices_to_excel(database_name, include_descendants=True):
"""
Export LCI matrices to Excel.
Parameters
----------
database_name : str
Name of database to export.
include_descendants : bool
Include databases which are linked from ``database_name``. (default True)
Returns
-------
filepath : str
Path to created Excel file.
Examples
--------
>>> lci_matrices_to_excel(database_name='example_db', include_descendants=True)
'/path/to/example_db.xlsx'
"""
from bw2calc import LCA
print("Starting Excel export. This can be slow for large matrices!")
safe_name = safe_filename(database_name, False)
filepath = os.path.join(projects.output_dir, safe_name + ".xlsx")
lca = LCA({Database(database_name).random(): 1})
lca.load_lci_data()
lca.fix_dictionaries()
if not include_descendants:
lca.activity_dict = {
key: value
for key, value in lca.activity_dict.items()
if key[0] == database_name
}
# Drop biosphere flows with zero references
# TODO: This will ignore (-1 + 1 = 0) references
lca.biosphere_dict = {
key: value
for key, value in lca.biosphere_dict.items()
if lca.biosphere_matrix[lca.biosphere_dict[key], :].sum() != 0
}
workbook = xlsxwriter.Workbook(filepath)
bold = workbook.add_format({"bold": True})
print("Sorting objects")
sorted_activity_keys = sorted(
[(Database.get(key).get("name") or "Unknown", key) for key in lca.activity_dict]
)
sorted_product_keys = sorted(
[(Database.get(key).get("name") or "Unknown", key) for key in lca.product_dict]
)
sorted_bio_keys = sorted(
[
(Database.get(key).get("name") or "Unknown", key)
for key in lca.biosphere_dict
]
)
tm_sheet = workbook.add_worksheet("technosphere")
tm_sheet.set_column("A:A", 50)
data = Database(database_name).load()
# Labels
for index, data in enumerate(sorted_activity_keys):
tm_sheet.write_string(0, index + 1, data[0])
for index, data in enumerate(sorted_product_keys):
tm_sheet.write_string(index + 1, 0, data[0])
print("Entering technosphere matrix data")
coo = lca.technosphere_matrix.tocoo()
# Translate row index to sorted product index
act_dict = {obj[1]: idx for idx, obj in enumerate(sorted_activity_keys)}
pro_dict = {obj[1]: idx for idx, obj in enumerate(sorted_product_keys)}
bio_dict = {obj[1]: idx for idx, obj in enumerate(sorted_bio_keys)}
pro_lookup = {v: pro_dict[k] for k, v in lca.product_dict.items()}
bio_lookup = {v: bio_dict[k] for k, v in lca.biosphere_dict.items()}
act_lookup = {v: act_dict[k] for k, v in lca.activity_dict.items()}
# Matrix values
for row, col, value in zip(coo.row, coo.col, coo.data):
tm_sheet.write_number(pro_lookup[row] + 1, act_lookup[col] + 1, value)
bm_sheet = workbook.add_worksheet("biosphere")
bm_sheet.set_column("A:A", 50)
data = Database(database_name).load()
# Labels
for index, data in enumerate(sorted_activity_keys):
bm_sheet.write_string(0, index + 1, data[0])
for index, data in enumerate(sorted_bio_keys):
bm_sheet.write_string(index + 1, 0, data[0])
print("Entering biosphere matrix data")
coo = lca.biosphere_matrix.tocoo()
# Matrix values
for row, col, value in zip(coo.row, coo.col, coo.data):
bm_sheet.write_number(bio_lookup[row] + 1, act_lookup[col] + 1, value)
COLUMNS = (
"Index",
"Name",
"Reference product",
"Unit",
"Categories",
"Location",
)
tech_sheet = workbook.add_worksheet("technosphere-labels")
tech_sheet.set_column("B:B", 60)
tech_sheet.set_column("C:C", 30)
tech_sheet.set_column("D:D", 15)
tech_sheet.set_column("E:E", 30)
print("Writing metadata")
# Header
for index, col in enumerate(COLUMNS):
tech_sheet.write_string(0, index, col, bold)
tech_sheet.write_comment(
"C1",
"Only for ecoinvent 3, where names =/= products.",
)
for index, data in enumerate(sorted_activity_keys):
obj = Database.get(data[1])
tech_sheet.write_number(index + 1, 0, index + 1)
tech_sheet.write_string(index + 1, 1, obj.get("name") or "Unknown")
tech_sheet.write_string(index + 1, 2, obj.get("reference product") or "")
tech_sheet.write_string(index + 1, 3, obj.get("unit") or "Unknown")
tech_sheet.write_string(index + 1, 4, " - ".join(obj.get("categories") or []))
tech_sheet.write_string(index + 1, 5, obj.get("location") or "Unknown")
COLUMNS = (
"Index",
"Name",
"Unit",
"Categories",
)
bio_sheet = workbook.add_worksheet("biosphere-labels")
bio_sheet.set_column("B:B", 60)
bio_sheet.set_column("C:C", 15)
bio_sheet.set_column("D:D", 30)
# Header
for index, col in enumerate(COLUMNS):
bio_sheet.write_string(0, index, col, bold)
for index, data in enumerate(sorted_bio_keys):
obj = Database.get(data[1])
bio_sheet.write_number(index + 1, 0, index + 1)
bio_sheet.write_string(index + 1, 1, obj.get("name") or "Unknown")
bio_sheet.write_string(index + 1, 2, obj.get("unit") or "Unknown")
bio_sheet.write_string(index + 1, 3, " - ".join(obj.get("categories") or []))
workbook.close()
return filepath
[docs]
def write_lci_excel(database_name, objs=None, sections=None, dirpath=None):
"""
Export database `database_name` to an Excel spreadsheet.
Notes
-----
Not all data can be exported. The following constraints apply:
* Nested data, e.g. `{'foo': {'bar': 'baz'}}` are excluded.
* Spreadsheets are not a great format for nested data. However, *tuples* are exported, and the characters `::` are used to join elements of the tuple.
* The only well-supported data types are strings, numbers, and booleans.
Default directory is ``projects.output_dir``, set ``dirpath`` to have save the file somewhere else.
Parameters
----------
database_name : str
Name of the database to export.
objs : list, optional
List of objects to export. If not provided, all objects in the database will be exported.
sections : list, optional
List of sections to export. If not provided, all sections will be exported.
dirpath : str, optional
Directory to save the file to. Default is ``projects.output_dir``.
Returns
-------
str
Filepath of the exported file.
"""
safe_name = safe_filename(database_name, False)
if dirpath is None:
dirpath = projects.output_dir
if not os.path.isdir(dirpath) or not os.access(dirpath, os.W_OK):
raise ValueError(f"Directory path {dirpath} is not a writable directory")
filepath = os.path.join(dirpath, "lci-" + safe_name + ".xlsx")
workbook = xlsxwriter.Workbook(filepath)
bold = workbook.add_format({"bold": True})
bold.set_font_size(12)
highlighted = {
"Activity",
"Database",
"Exchanges",
"Parameters",
"Database parameters",
"Project parameters",
}
frmt = lambda x: bold if row[0] in highlighted else None
sheet = workbook.add_worksheet(create_valid_worksheet_name(database_name))
data = CSVFormatter(database_name, objs).get_formatted_data(sections)
for row_index, row in enumerate(data):
for col_index, value in enumerate(row):
if value is None:
continue
elif isinstance(value, numbers.Number):
sheet.write_number(row_index, col_index, value, frmt(value))
else:
try:
sheet.write_string(row_index, col_index, value, frmt(value))
except TypeError:
pass
workbook.close()
return filepath
[docs]
def write_lci_matching(
db: List[dict],
database_name: str,
only_unlinked: bool = False,
only_activity_names: bool = False,
output_dir: Optional[Path] = None,
):
"""
Write matched and unmatched exchanges to Excel file
Parameters
----------
db : :class:`bw2data.Database`
Database to write.
database_name : str
Name of the database to write.
only_unlinked : bool, optional
Only write unlinked exchanges. Default is ``False``.
only_activity_names : bool, optional
Only write activity names. Default is ``False``.
Returns
-------
str
Filepath of the exported file.
"""
def write_headers(sheet, row):
columns = (
"Name",
"Reference Product",
"Amount",
"Database",
"Unit",
"Categories",
"Location",
"Type",
"Matched",
)
for index, col in enumerate(columns):
sheet.write_string(row, index, col, bold)
def write_row(sheet, row, data, exc=True):
style = highlighted if ("input" not in data and exc) else None
if exc:
sheet.write_string(row, 0, data.get("name") or "(unknown)", style)
sheet.write_string(
row, 1, data.get("reference product") or "(unknown)", style
)
try:
sheet.write_number(row, 2, float(data.get("amount")), style)
except ValueError:
sheet.write_string(row, 2, "Unknown", style)
else:
sheet.write_string(row, 0, data.get("name") or "(unknown)", bold)
sheet.write_string(row, 3, (data.get("input") or [""])[0], style)
sheet.write_string(row, 4, data.get("unit") or "(unknown)", style)
sheet.write_string(
row, 5, ":".join(data.get("categories") or ["(unknown)"]), style
)
sheet.write_string(row, 6, data.get("location") or "(unknown)", style)
if exc:
sheet.write_string(row, 7, data.get("type") or "(unknown)", style)
sheet.write_boolean(row, 8, "input" in data, style)
if only_unlinked and only_activity_names:
raise ValueError(
"Must choose only one of ``only_unlinked`` and ``only_activity_names``"
)
safe_name = safe_filename(database_name, False)
suffix = "-unlinked" if only_unlinked else ("-names" if only_activity_names else "")
filepath = (
Path(output_dir or projects.output_dir)
/ f"db-matching-{safe_name}{suffix}.xlsx"
)
workbook = xlsxwriter.Workbook(filepath)
bold = workbook.add_format({"bold": True})
highlighted = workbook.add_format({"bg_color": "#FFB5B5"})
bold.set_font_size(12)
sheet = workbook.add_worksheet("matching")
sheet.set_column("A:A", 60)
sheet.set_column("B:B", 12)
sheet.set_column("C:C", 12)
sheet.set_column("D:D", 20)
sheet.set_column("E:E", 40)
sheet.set_column("F:F", 12)
sheet.set_column("G:G", 12)
row = 0
if only_unlinked:
unique_unlinked = collections.defaultdict(set)
hash_dict = {}
for ds in db:
for exc in (e for e in ds.get("exchanges", []) if not e.get("input")):
ah = activity_hash(exc)
unique_unlinked[exc.get("type")].add(ah)
hash_dict[ah] = exc
for key in sorted(unique_unlinked.keys()):
sheet.write_string(row, 0, key, bold)
write_headers(sheet, row + 1)
row += 2
exchanges = [hash_dict[ah] for ah in unique_unlinked[key]]
exchanges.sort(key=lambda x: (x["name"], list(x.get("categories", []))))
for exc in exchanges:
write_row(sheet, row, exc)
row += 1
row += 1
else:
for ds in db:
if not ds.get("exchanges"):
continue
write_row(sheet, row, ds, False)
if only_activity_names:
row += 1
continue
write_headers(sheet, row + 1)
row += 2
for exc in sorted(ds.get("exchanges", []), key=lambda x: x.get("name")):
write_row(sheet, row, exc)
row += 1
row += 1
workbook.close()
return filepath
[docs]
def write_lcia_matching(db, name):
"""
Write matched and unmatched CFs to Excel file
Parameters
----------
db : :class:`bw2data.Database`
Database to write.
name : str
Name of the database to write.
Returns
-------
str
Filepath of the exported file.
"""
def write_headers(sheet, row):
columns = ("Name", "Amount", "Unit", "Categories", "Matched")
for index, col in enumerate(columns):
sheet.write_string(row, index, col, bold)
def write_row(sheet, row, data):
sheet.write_string(row, 0, data.get("name") or "(unknown)")
sheet.write_number(row, 1, data.get("amount", -1))
sheet.write_string(row, 2, data.get("unit") or "(unknown)")
sheet.write_string(row, 3, ":".join(data.get("categories") or ["(unknown)"]))
sheet.write_boolean(row, 4, "input" in data)
safe_name = safe_filename(name, False)
filepath = os.path.join(projects.output_dir, "lcia-matching-" + safe_name + ".xlsx")
workbook = xlsxwriter.Workbook(filepath)
bold = workbook.add_format({"bold": True})
bold.set_font_size(12)
sheet = workbook.add_worksheet("matching")
sheet.set_column("A:A", 60)
sheet.set_column("B:B", 12)
sheet.set_column("C:C", 12)
sheet.set_column("D:D", 40)
row = 0
for ds in db:
for index, elem in enumerate(ds["name"]):
sheet.write_string(row, index, elem, bold)
write_headers(sheet, row + 1)
row += 2
for cf in sorted(ds.get("exchanges", []), key=lambda x: x.get("name")):
write_row(sheet, row, cf)
row += 1
row += 1
workbook.close()
return filepath