Source code for bw2io.export.excel

import collections
import numbers
import os
from pathlib import Path
from typing import List, Optional

import xlsxwriter
from bw2data import Database, projects
from bw_processing import safe_filename

from ..utils import activity_hash
from .csv import CSVFormatter


[docs] def create_valid_worksheet_name(string): """ Exclude invalid characters and names. Parameters ---------- string : str String to convert to a valid worksheet name. Returns ------- string : str Valid worksheet name. Notes ----- Data from http://www.accountingweb.com/technology/excel/seven-characters-you-cant-use-in-worksheet-names. """ excluded = {"\\", "/", "*", "[", "]", ":", "?"} if string == "History": return "History-worksheet" for x in excluded: string = string.replace(x, "#") return string[:30]
[docs] def lci_matrices_to_excel(database_name, include_descendants=True): """ Export LCI matrices to Excel. Parameters ---------- database_name : str Name of database to export. include_descendants : bool Include databases which are linked from ``database_name``. (default True) Returns ------- filepath : str Path to created Excel file. Examples -------- >>> lci_matrices_to_excel(database_name='example_db', include_descendants=True) '/path/to/example_db.xlsx' """ from bw2calc import LCA print("Starting Excel export. This can be slow for large matrices!") safe_name = safe_filename(database_name, False) filepath = os.path.join(projects.output_dir, safe_name + ".xlsx") lca = LCA({Database(database_name).random(): 1}) lca.load_lci_data() lca.fix_dictionaries() if not include_descendants: lca.activity_dict = { key: value for key, value in lca.activity_dict.items() if key[0] == database_name } # Drop biosphere flows with zero references # TODO: This will ignore (-1 + 1 = 0) references lca.biosphere_dict = { key: value for key, value in lca.biosphere_dict.items() if lca.biosphere_matrix[lca.biosphere_dict[key], :].sum() != 0 } workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({"bold": True}) print("Sorting objects") sorted_activity_keys = sorted( [(Database.get(key).get("name") or "Unknown", key) for key in lca.activity_dict] ) sorted_product_keys = sorted( [(Database.get(key).get("name") or "Unknown", key) for key in lca.product_dict] ) sorted_bio_keys = sorted( [ (Database.get(key).get("name") or "Unknown", key) for key in lca.biosphere_dict ] ) tm_sheet = workbook.add_worksheet("technosphere") tm_sheet.set_column("A:A", 50) data = Database(database_name).load() # Labels for index, data in enumerate(sorted_activity_keys): tm_sheet.write_string(0, index + 1, data[0]) for index, data in enumerate(sorted_product_keys): tm_sheet.write_string(index + 1, 0, data[0]) print("Entering technosphere matrix data") coo = lca.technosphere_matrix.tocoo() # Translate row index to sorted product index act_dict = {obj[1]: idx for idx, obj in enumerate(sorted_activity_keys)} pro_dict = {obj[1]: idx for idx, obj in enumerate(sorted_product_keys)} bio_dict = {obj[1]: idx for idx, obj in enumerate(sorted_bio_keys)} pro_lookup = {v: pro_dict[k] for k, v in lca.product_dict.items()} bio_lookup = {v: bio_dict[k] for k, v in lca.biosphere_dict.items()} act_lookup = {v: act_dict[k] for k, v in lca.activity_dict.items()} # Matrix values for row, col, value in zip(coo.row, coo.col, coo.data): tm_sheet.write_number(pro_lookup[row] + 1, act_lookup[col] + 1, value) bm_sheet = workbook.add_worksheet("biosphere") bm_sheet.set_column("A:A", 50) data = Database(database_name).load() # Labels for index, data in enumerate(sorted_activity_keys): bm_sheet.write_string(0, index + 1, data[0]) for index, data in enumerate(sorted_bio_keys): bm_sheet.write_string(index + 1, 0, data[0]) print("Entering biosphere matrix data") coo = lca.biosphere_matrix.tocoo() # Matrix values for row, col, value in zip(coo.row, coo.col, coo.data): bm_sheet.write_number(bio_lookup[row] + 1, act_lookup[col] + 1, value) COLUMNS = ( "Index", "Name", "Reference product", "Unit", "Categories", "Location", ) tech_sheet = workbook.add_worksheet("technosphere-labels") tech_sheet.set_column("B:B", 60) tech_sheet.set_column("C:C", 30) tech_sheet.set_column("D:D", 15) tech_sheet.set_column("E:E", 30) print("Writing metadata") # Header for index, col in enumerate(COLUMNS): tech_sheet.write_string(0, index, col, bold) tech_sheet.write_comment( "C1", "Only for ecoinvent 3, where names =/= products.", ) for index, data in enumerate(sorted_activity_keys): obj = Database.get(data[1]) tech_sheet.write_number(index + 1, 0, index + 1) tech_sheet.write_string(index + 1, 1, obj.get("name") or "Unknown") tech_sheet.write_string(index + 1, 2, obj.get("reference product") or "") tech_sheet.write_string(index + 1, 3, obj.get("unit") or "Unknown") tech_sheet.write_string(index + 1, 4, " - ".join(obj.get("categories") or [])) tech_sheet.write_string(index + 1, 5, obj.get("location") or "Unknown") COLUMNS = ( "Index", "Name", "Unit", "Categories", ) bio_sheet = workbook.add_worksheet("biosphere-labels") bio_sheet.set_column("B:B", 60) bio_sheet.set_column("C:C", 15) bio_sheet.set_column("D:D", 30) # Header for index, col in enumerate(COLUMNS): bio_sheet.write_string(0, index, col, bold) for index, data in enumerate(sorted_bio_keys): obj = Database.get(data[1]) bio_sheet.write_number(index + 1, 0, index + 1) bio_sheet.write_string(index + 1, 1, obj.get("name") or "Unknown") bio_sheet.write_string(index + 1, 2, obj.get("unit") or "Unknown") bio_sheet.write_string(index + 1, 3, " - ".join(obj.get("categories") or [])) workbook.close() return filepath
[docs] def write_lci_excel(database_name, objs=None, sections=None, dirpath=None): """ Export database `database_name` to an Excel spreadsheet. Notes ----- Not all data can be exported. The following constraints apply: * Nested data, e.g. `{'foo': {'bar': 'baz'}}` are excluded. * Spreadsheets are not a great format for nested data. However, *tuples* are exported, and the characters `::` are used to join elements of the tuple. * The only well-supported data types are strings, numbers, and booleans. Default directory is ``projects.output_dir``, set ``dirpath`` to have save the file somewhere else. Parameters ---------- database_name : str Name of the database to export. objs : list, optional List of objects to export. If not provided, all objects in the database will be exported. sections : list, optional List of sections to export. If not provided, all sections will be exported. dirpath : str, optional Directory to save the file to. Default is ``projects.output_dir``. Returns ------- str Filepath of the exported file. """ safe_name = safe_filename(database_name, False) if dirpath is None: dirpath = projects.output_dir if not os.path.isdir(dirpath) or not os.access(dirpath, os.W_OK): raise ValueError(f"Directory path {dirpath} is not a writable directory") filepath = os.path.join(dirpath, "lci-" + safe_name + ".xlsx") workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({"bold": True}) bold.set_font_size(12) highlighted = { "Activity", "Database", "Exchanges", "Parameters", "Database parameters", "Project parameters", } frmt = lambda x: bold if row[0] in highlighted else None sheet = workbook.add_worksheet(create_valid_worksheet_name(database_name)) data = CSVFormatter(database_name, objs).get_formatted_data(sections) for row_index, row in enumerate(data): for col_index, value in enumerate(row): if value is None: continue elif isinstance(value, numbers.Number): sheet.write_number(row_index, col_index, value, frmt(value)) else: try: sheet.write_string(row_index, col_index, value, frmt(value)) except TypeError: pass workbook.close() return filepath
[docs] def write_lci_matching( db: List[dict], database_name: str, only_unlinked: bool = False, only_activity_names: bool = False, output_dir: Optional[Path] = None, ): """ Write matched and unmatched exchanges to Excel file Parameters ---------- db : :class:`bw2data.Database` Database to write. database_name : str Name of the database to write. only_unlinked : bool, optional Only write unlinked exchanges. Default is ``False``. only_activity_names : bool, optional Only write activity names. Default is ``False``. Returns ------- str Filepath of the exported file. """ def write_headers(sheet, row): columns = ( "Name", "Reference Product", "Amount", "Database", "Unit", "Categories", "Location", "Type", "Matched", ) for index, col in enumerate(columns): sheet.write_string(row, index, col, bold) def write_row(sheet, row, data, exc=True): style = highlighted if ("input" not in data and exc) else None if exc: sheet.write_string(row, 0, data.get("name") or "(unknown)", style) sheet.write_string( row, 1, data.get("reference product") or "(unknown)", style ) try: sheet.write_number(row, 2, float(data.get("amount")), style) except ValueError: sheet.write_string(row, 2, "Unknown", style) else: sheet.write_string(row, 0, data.get("name") or "(unknown)", bold) sheet.write_string(row, 3, (data.get("input") or [""])[0], style) sheet.write_string(row, 4, data.get("unit") or "(unknown)", style) sheet.write_string( row, 5, ":".join(data.get("categories") or ["(unknown)"]), style ) sheet.write_string(row, 6, data.get("location") or "(unknown)", style) if exc: sheet.write_string(row, 7, data.get("type") or "(unknown)", style) sheet.write_boolean(row, 8, "input" in data, style) if only_unlinked and only_activity_names: raise ValueError( "Must choose only one of ``only_unlinked`` and ``only_activity_names``" ) safe_name = safe_filename(database_name, False) suffix = "-unlinked" if only_unlinked else ("-names" if only_activity_names else "") filepath = ( Path(output_dir or projects.output_dir) / f"db-matching-{safe_name}{suffix}.xlsx" ) workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({"bold": True}) highlighted = workbook.add_format({"bg_color": "#FFB5B5"}) bold.set_font_size(12) sheet = workbook.add_worksheet("matching") sheet.set_column("A:A", 60) sheet.set_column("B:B", 12) sheet.set_column("C:C", 12) sheet.set_column("D:D", 20) sheet.set_column("E:E", 40) sheet.set_column("F:F", 12) sheet.set_column("G:G", 12) row = 0 if only_unlinked: unique_unlinked = collections.defaultdict(set) hash_dict = {} for ds in db: for exc in (e for e in ds.get("exchanges", []) if not e.get("input")): ah = activity_hash(exc) unique_unlinked[exc.get("type")].add(ah) hash_dict[ah] = exc for key in sorted(unique_unlinked.keys()): sheet.write_string(row, 0, key, bold) write_headers(sheet, row + 1) row += 2 exchanges = [hash_dict[ah] for ah in unique_unlinked[key]] exchanges.sort(key=lambda x: (x["name"], list(x.get("categories", [])))) for exc in exchanges: write_row(sheet, row, exc) row += 1 row += 1 else: for ds in db: if not ds.get("exchanges"): continue write_row(sheet, row, ds, False) if only_activity_names: row += 1 continue write_headers(sheet, row + 1) row += 2 for exc in sorted(ds.get("exchanges", []), key=lambda x: x.get("name")): write_row(sheet, row, exc) row += 1 row += 1 workbook.close() return filepath
[docs] def write_lcia_matching(db, name): """ Write matched and unmatched CFs to Excel file Parameters ---------- db : :class:`bw2data.Database` Database to write. name : str Name of the database to write. Returns ------- str Filepath of the exported file. """ def write_headers(sheet, row): columns = ("Name", "Amount", "Unit", "Categories", "Matched") for index, col in enumerate(columns): sheet.write_string(row, index, col, bold) def write_row(sheet, row, data): sheet.write_string(row, 0, data.get("name") or "(unknown)") sheet.write_number(row, 1, data.get("amount", -1)) sheet.write_string(row, 2, data.get("unit") or "(unknown)") sheet.write_string(row, 3, ":".join(data.get("categories") or ["(unknown)"])) sheet.write_boolean(row, 4, "input" in data) safe_name = safe_filename(name, False) filepath = os.path.join(projects.output_dir, "lcia-matching-" + safe_name + ".xlsx") workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({"bold": True}) bold.set_font_size(12) sheet = workbook.add_worksheet("matching") sheet.set_column("A:A", 60) sheet.set_column("B:B", 12) sheet.set_column("C:C", 12) sheet.set_column("D:D", 40) row = 0 for ds in db: for index, elem in enumerate(ds["name"]): sheet.write_string(row, index, elem, bold) write_headers(sheet, row + 1) row += 2 for cf in sorted(ds.get("exchanges", []), key=lambda x: x.get("name")): write_row(sheet, row, cf) row += 1 row += 1 workbook.close() return filepath