Source code for bw_simapro_csv.cas

from numbers import Number
from typing import Optional

import numpy as np
from loguru import logger


[docs] def calculate_check_digit(cas: str) -> int: return sum((a + 1) * int(b) for a, b in zip(range(9), cas[-1::-1])) % 10
[docs] def validate_cas_string(cas: Optional[str]) -> Optional[str]: if isinstance(cas, str): cas = cas.strip() if not cas: return None elif isinstance(cas, Number) and np.isnan(cas): return None if "-" not in cas: first, second, check_digit = cas[:-3], cas[-3:-1], int(cas[-1]) if str(calculate_check_digit(first + second)) != str(check_digit): logger.warning( "Removing invalid CAS number {}; last digit should be {}".format(cas, check_digit) ) return None return "-".join([first, second, str(check_digit)]).lstrip("0") elif cas.count("-") == 2 and not cas.split("-")[2]: # e.g. 1228284-64- check_digit = str(calculate_check_digit(cas.replace("-", ""))) logger.warning("Adding missing CAS check digit, {} -> {}".format(cas, cas + check_digit)) return cas + check_digit elif cas.count("-") == 2: first, second, third = cas.split("-") check_digit = calculate_check_digit(first + second) if str(check_digit) != third: logger.warning( "Removing invalid CAS number {}; last digit should be {}".format(cas, check_digit) ) else: return cas.lstrip("0") else: logger.warning( "Given CAS can't be validated, wrong number of hyphens are present: {}".format(cas) ) return None