Source code for bw2io.backup
from pathlib import Path
from typing import Optional
import codecs
import datetime
import json
import os
import shutil
import tarfile
import tempfile
from bw2data import projects
from bw2data.utils import safe_filename
[docs]
def backup_data_directory():
"""
Backup data directory to a ``.tar.gz`` (compressed tar archive) in the user's home directory.
Restoration is done manually.
Examples
--------
>>> bw2io.bw2setup()
>>> bw2io.backup.backup_data_directory()
Creating backup archive - this could take a few minutes...
"""
fp = os.path.join(
os.path.expanduser("~"),
"brightway2-data-backup.{}.tar.gz".format(
datetime.datetime.now().strftime("%d-%B-%Y-%I-%M%p")
),
)
print("Creating backup archive - this could take a few minutes...")
with tarfile.open(fp, "w:gz") as tar:
tar.add(projects.dir, arcname=os.path.basename(projects.dir))
[docs]
def backup_project_directory(project: str):
"""
Backup project data directory to a ``.tar.gz`` (compressed tar archive) in the user's home directory.
Parameters
----------
project : str
Name of the project to backup.
Returns
-------
project_name : str
Name of the project that was backed up.
Raises
------
ValueError
If the project does not exist.
See Also
--------
bw2io.backup.restore_project_directory: To restore a project directory from a backup.
"""
if project not in projects:
raise ValueError("Project {} does not exist".format(project))
fp = os.path.join(
os.path.expanduser("~"),
"brightway2-project-{}-backup.{}.tar.gz".format(
project, datetime.datetime.now().strftime("%d-%B-%Y-%I-%M%p")
),
)
dir_path = os.path.join(projects._base_data_dir, safe_filename(project))
with open(os.path.join(dir_path, ".project-name.json"), "w") as f:
json.dump({"name": project}, f)
print("Creating project backup archive - this could take a few minutes...")
with tarfile.open(fp, "w:gz") as tar:
tar.add(dir_path, arcname=safe_filename(project))
return project
[docs]
def restore_project_directory(fp: str, project_name: Optional[str] = None, overwrite_existing: Optional[bool] = False):
"""
Restore a backed up project data directory from a ``.tar.gz`` (compressed tar archive) in the user's home directory.
Parameters
----------
fp : str
File path of the project to restore.
project_name : str, optional
Name of new project to create
overwrite_existing : bool, optional
Returns
-------
project_name : str
Name of the project that was restored.
Raises
------
ValueError
If the project does not exist.
See Also
--------
bw2io.backup.backup_project_directory: To restore a project directory from a backup.
"""
def get_project_name(fp):
reader = codecs.getreader("utf-8")
# See https://stackoverflow.com/questions/68997850/python-readlines-with-tar-file-gives-streamerror-seeking-backwards-is-not-al/68998071#68998071
with tarfile.open(fp, "r:gz") as tar:
for member in tar:
if member.name[-17:] == "project-name.json":
return json.load(reader(tar.extractfile(member)))["name"]
raise ValueError("Couldn't find project name file in archive")
assert os.path.isfile(fp), "Can't find file at path: {}".format(fp)
print("Restoring project backup archive - this could take a few minutes...")
project_name = get_project_name(fp) if project_name is None else project_name
if project_name in projects and not overwrite_existing:
raise ValueError("Project {} already exists".format(project_name))
with tempfile.TemporaryDirectory() as td:
with tarfile.open(fp, "r:gz") as tar:
def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory
def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")
tar.extractall(path, members, numeric_owner=numeric_owner)
safe_extract(tar, td)
# Find single extracted directory; don't know it ahead of time
extracted_dir = [(Path(td) / dirname) for dirname in Path(td).iterdir() if (Path(td) / dirname).is_dir()]
if not len(extracted_dir) == 1:
raise ValueError("Can't find single directory extracted from project archive")
extracted_path = extracted_dir[0]
_current = projects.current
projects.set_current(project_name, update=False)
shutil.copytree(extracted_path, projects.dir, dirs_exist_ok=True)
projects.set_current(_current)
return project_name