Source code for bw_simapro_csv.header
from datetime import datetime
from enum import Enum
from typing import List, Optional
from dateutil import parser
from loguru import logger
from pydantic import BaseModel
from .utils import asboolean, nobraces, noquotes
# Sometimes they use text labels instead of characters
[docs]
BOOLEAN_LABELS = {
"Convert expressions to constants:": "convert_expressions",
"Exclude library processes:": "exclude_library_processes",
"Export platform IDs:": "export_platform_ids",
"Include sub product stages and processes:": "include_stages",
"Related objects (system descriptions, substances, units, etc.):": "related_objects",
"Skip empty fields:": "skip_empty_fields",
"Skip unused parameters:": "skip_unused_parameters",
}
[docs]
STRING_LABELS = {
"csv format version:": "csv_version",
"date separator:": "date_separator",
"decimal separator:": "decimal_separator",
"open project:": "open_project",
"selection:": "selection",
# These labels can be translated; who knows why these and not others!?
"project": "project",
"projet": "project",
"heijastaa": "project",
"tionscadal": "project",
"proyecto": "project",
"projeto": "project",
"progetto": "project",
"projekt": "project",
"open library": "open_library",
"ouvrir bibliothèque": "open_library", # This doesn't feel correct but it's in the files
"ouvrir projet": "open_project", # I made this one up
}
[docs]
TYPE_TRANSLATIONS = {
# All these are guesses, we don't have enough examples
"étapes du produit": "product stages",
"méthodes": "methods",
"processus": "processes",
"productfasen": "product stages",
"methoden": "methods",
"processen": "processes",
"fasi del prodotto": "product stages",
"metodi": "methods",
"processi": "processes",
"produktphasen": "product stages",
"methoden": "methods",
"prozesse": "processes",
}
[docs]
def parse_header(data: List[str]) -> (SimaProCSVHeader, int):
"""
Read the header section and parse its values. A typical header looks like:
{SimaPro 8.2.0.0}
{processes}
{Date: 10/12/2016}
{Time: 10:54:47 PM}
Sometimes these lines can be quoted:
"{Related objects (system descriptions, substances, units, etc.): Yes}"
The generic pattern is:
SimaPro version
File export type
Key: value dictionary
Optional library list
We parse this into a header dictionary, doing type conversion when necessary.
"""
parsed = {"libraries": []}
date = time = dtformat = ""
for index, line in enumerate(data):
if not (line.startswith('"{') or line.startswith("{")):
break
line = nobraces(line)
kind = TYPE_TRANSLATIONS.get(line.lower(), line.lower())
if line.startswith("SimaPro") and ":" not in line:
parsed["simapro_version"] = line[8:]
elif kind in iter(SimaProCSVType):
parsed["kind"] = kind
elif line.startswith("CSV separator:"):
char = line[len("CSV separator:") :].strip()
parsed["delimiter"] = DELIMITER_MAP.get(char.lower(), char)
elif line.startswith("Date:"):
date = line[len("Date:") :].strip()
elif line.startswith("Short date format:"):
dtformat = line[len("Short date format:") :].strip()
elif line.startswith("Time:"):
time = line[len("Time:") :].strip()
elif any(line.startswith(found_key := key) for key in BOOLEAN_LABELS):
parsed[BOOLEAN_LABELS[found_key]] = asboolean(line[len(found_key) :].strip())
elif any(line.lower().startswith(found_key := key) for key in STRING_LABELS):
parsed[STRING_LABELS[found_key]] = noquotes(line[len(found_key) :].strip())
elif line.startswith("Library '"):
parsed["libraries"].append(noquotes(line[len("Library") :].strip()))
else:
logger.warning(f"Can't understand header line (skipping):\n\t{line}")
if "kind" not in parsed:
logger.warning(
"""
Export is missing type (processes, methods, or product stages).
Using default value of 'processes'
"""
)
parsed["kind"] = "processes"
dayfirst = not (
date
and time
and dtformat
# Can be 'MM' or 'M'
and "M" in dtformat
and "d" in dtformat
and dtformat.index("M") < dtformat.index("d")
)
parsed["dayfirst"] = dayfirst
try:
parsed["created"] = parser.parse(f"{date} {time}", dayfirst=dayfirst)
except parser.ParserError:
parsed["created"] = datetime.now()
return SimaProCSVHeader(**parsed), index