Source code for bw_simapro_csv.header

from datetime import datetime
from enum import Enum
from typing import List, Optional

from dateutil import parser
from loguru import logger
from pydantic import BaseModel

from .utils import asboolean, nobraces, noquotes

# Sometimes they use text labels instead of characters

[docs]
DELIMITER_MAP = {"semicolon": ";", "tab": "\t", "comma": ","}



[docs]
BOOLEAN_LABELS = {
    "Convert expressions to constants:": "convert_expressions",
    "Exclude library processes:": "exclude_library_processes",
    "Export platform IDs:": "export_platform_ids",
    "Include sub product stages and processes:": "include_stages",
    "Related objects (system descriptions, substances, units, etc.):": "related_objects",
    "Skip empty fields:": "skip_empty_fields",
    "Skip unused parameters:": "skip_unused_parameters",
}


[docs]
STRING_LABELS = {
    "csv format version:": "csv_version",
    "date separator:": "date_separator",
    "decimal separator:": "decimal_separator",
    "open project:": "open_project",
    "selection:": "selection",
    # These labels can be translated; who knows why these and not others!?
    "project": "project",
    "projet": "project",
    "heijastaa": "project",
    "tionscadal": "project",
    "proyecto": "project",
    "projeto": "project",
    "progetto": "project",
    "projekt": "project",
    "open library": "open_library",
    "ouvrir bibliothèque": "open_library",  # This doesn't feel correct but it's in the files
    "ouvrir projet": "open_project",  # I made this one up
}




[docs]
TYPE_TRANSLATIONS = {
    # All these are guesses, we don't have enough examples
    "étapes du produit": "product stages",
    "méthodes": "methods",
    "processus": "processes",
    "productfasen": "product stages",
    "methoden": "methods",
    "processen": "processes",
    "fasi del prodotto": "product stages",
    "metodi": "methods",
    "processi": "processes",
    "produktphasen": "product stages",
    "methoden": "methods",
    "prozesse": "processes",
}




[docs]
class SimaProCSVType(str, Enum):

[docs]
    stages = "product stages"


[docs]
    methods = "methods"


[docs]
    processes = "processes"





[docs]
class SimaProCSVHeader(BaseModel):

[docs]
    simapro_version: str


[docs]
    kind: SimaProCSVType


[docs]
    delimiter: str


[docs]
    project: Optional[str] = None


[docs]
    csv_version: str


[docs]
    libraries: List[str] = []


[docs]
    selection: Optional[str] = None


[docs]
    open_project: Optional[str] = None


[docs]
    open_library: Optional[str] = None


[docs]
    date_separator: Optional[str] = "/"


[docs]
    dayfirst: Optional[bool] = False


[docs]
    export_platform_ids: Optional[bool] = None


[docs]
    skip_empty_fields: Optional[bool] = None


[docs]
    convert_expressions: Optional[bool] = None


[docs]
    related_objects: Optional[bool] = None


[docs]
    include_stages: Optional[bool] = None


[docs]
    decimal_separator: Optional[str] = "."


[docs]
    created: Optional[datetime] = None


[docs]
    exclude_library_processes: Optional[bool] = None





[docs]
def parse_header(data: List[str]) -> (SimaProCSVHeader, int):
    """
    Read the header section and parse its values. A typical header looks like:

        {SimaPro 8.2.0.0}
        {processes}
        {Date: 10/12/2016}
        {Time: 10:54:47 PM}

    Sometimes these lines can be quoted:

        "{Related objects (system descriptions, substances, units, etc.): Yes}"

    The generic pattern is:

        SimaPro version
        File export type
        Key: value dictionary
        Optional library list

    We parse this into a header dictionary, doing type conversion when necessary.

    """
    parsed = {"libraries": []}

    date = time = dtformat = ""

    for index, line in enumerate(data):
        if not (line.startswith('"{') or line.startswith("{")):
            break

        line = nobraces(line)
        kind = TYPE_TRANSLATIONS.get(line.lower(), line.lower())

        if line.startswith("SimaPro") and ":" not in line:
            parsed["simapro_version"] = line[8:]
        elif kind in iter(SimaProCSVType):
            parsed["kind"] = kind
        elif line.startswith("CSV separator:"):
            char = line[len("CSV separator:") :].strip()
            parsed["delimiter"] = DELIMITER_MAP.get(char.lower(), char)
        elif line.startswith("Date:"):
            date = line[len("Date:") :].strip()
        elif line.startswith("Short date format:"):
            dtformat = line[len("Short date format:") :].strip()
        elif line.startswith("Time:"):
            time = line[len("Time:") :].strip()
        elif any(line.startswith(found_key := key) for key in BOOLEAN_LABELS):
            parsed[BOOLEAN_LABELS[found_key]] = asboolean(line[len(found_key) :].strip())
        elif any(line.lower().startswith(found_key := key) for key in STRING_LABELS):
            parsed[STRING_LABELS[found_key]] = noquotes(line[len(found_key) :].strip())
        elif line.startswith("Library '"):
            parsed["libraries"].append(noquotes(line[len("Library") :].strip()))
        else:
            logger.warning(f"Can't understand header line (skipping):\n\t{line}")

    if "kind" not in parsed:
        logger.warning(
            """
    Export is missing type (processes, methods, or product stages).
    Using default value of 'processes'
        """
        )
        parsed["kind"] = "processes"

    dayfirst = not (
        date
        and time
        and dtformat
        # Can be 'MM' or 'M'
        and "M" in dtformat
        and "d" in dtformat
        and dtformat.index("M") < dtformat.index("d")
    )
    parsed["dayfirst"] = dayfirst

    try:
        parsed["created"] = parser.parse(f"{date} {time}", dayfirst=dayfirst)
    except parser.ParserError:
        parsed["created"] = datetime.now()

    return SimaProCSVHeader(**parsed), index