Source code for dataio.utils.path_manager

from datetime import datetime
from pathlib import Path
from typing import Optional

from dataio.utils.accounts import AccountRepository
from dataio.utils.versions import VersionCollection


[docs] class BasePaths: def __init__(self, root_path: str | Path): if not isinstance(root_path, Path): root_path = Path(root_path) self.root_path = root_path self.bonsai_root = self.root_path / "_bonsai" if not self.bonsai_root.exists(): print("bonsai_root is ", self.bonsai_root) self.bonsai_root = self.root_path if not self.bonsai_root.exists(): raise FileNotFoundError( f"Root path <{self.bonsai_root}> is not set correctly. Please reinstantiate" ) @property def data_collection(self) -> Path: return self.bonsai_root / "collect" @property def data_clean(self) -> Path: return self.bonsai_root / "clean" @property def exiobase4(self) -> Path: from dataio.default import DB_NAME, DB_VERSION return self.bonsai_root / "_b Next version" / "Exiobase_4" # return self.bonsai_root / "_b Next version" / f"{DB_NAME}_{DB_VERSION}" @property def merged_collected_data(self) -> Path: return self.data_collection / "Merged_data" @property def correspondence(self) -> Path: return self.bonsai_root / "Classification and other data/corrspondence" @property def classification(self): return self.bonsai_root / "Classification and other data/classifications_exiob"
[docs] class PathBuilder: """ PathBuilder provides shorthand paths for files on a cloud storage specified by `root_path` to be used by the algorithms to create the full exiobase database. You can provide a version_source so that you can link to files of previous versions. Please resort to `default_version` for the structure of the version file. for entries that are not specified in the dictionary, the path to the latest available version is generated. Additionally, it is possible to provide the path to an alternative version file that is used instead of the standard one. """ def __init__( self, root_path: str | Path, version_source: Path | str | None = None, account_repository: AccountRepository = None, ): self.base_paths = BasePaths(root_path) self.bonsai_home = self.base_paths.root_path self.exiobase4 = self.base_paths.exiobase4 self.correspondence = self.base_paths.correspondence self.classification = self.base_paths.classification self.data_collection = self.base_paths.data_collection self.data_clean = self.base_paths.data_clean self.version_source = version_source self.versions: VersionCollection = self.load_version_dates( source=self.version_source ) self.account_repo = account_repository
[docs] def load_version_dates( self, source: str | Path | None = None, ) -> VersionCollection: return VersionCollection.load_from(source)
[docs] def compose( self, path: Path, account_name: Optional[str] = None, version_date: Optional[str] = "latest", ) -> Path: """Ensures a path exists by creating it if necessary. This method constructs a path by appending optional account and version date components. If the account name is provided, it retrieves the account, adding it if it doesn't exist. The version date can be specified or default to the latest version. The final path is created if it does not already exist. Parameters ---------- path : Path The base path to compose. account_name : Optional[str], optional The account name to include in the path. If the account does not exist, it will be added. Defaults to None. version_date : Optional[str], optional The version date to include in the path. It can be "latest" to use the latest version date, a specific date in "YYYY-MM-DD" format, or None to exclude the version date. Defaults to "latest". Returns ------- Path The composed path, created if it didn't already exist. Raises ------ KeyError: If the account name is not found in the account repository. ValueError: If the provided version date is invalid. """ path_components = [path] if account_name: try: account = self.account_repo.get_account(name=account_name) except KeyError: print(f"{account_name} not found in accounts") account = self.account_repo.add_account(account_name) path_components.append(account.name) if version_date == "latest": version = self.versions.get_latest_version(account.name) parsed_version_date = datetime.strptime( version.version_date, "%Y-%m-%d" ).strftime("%Y%m%d") elif version_date and version_date != "latest": try: self.versions.update_account_version_date( account.name, version_date ) except ValueError: self.versions.add_account_version( account_name=account.name, version_date=version_date ) parsed_version_date = datetime.strptime( version_date, "%Y-%m-%d" ).strftime("%Y%m%d") elif version_date is None: parsed_version_date = "" path_components.append(parsed_version_date) elif not account_name: if version_date == "latest": parsed_version_date = datetime.strptime( version_date, "%Y-%m-%d" ).strftime("%Y%m%d") path_components.append(parsed_version_date) elif version_date and version_date != "latest": parsed_version_date = datetime.strptime( version_date, "%Y-%m-%d" ).strftime("%Y%m%d") elif version_date is None: parsed_version_date = "" path_components.append(parsed_version_date) final_path = Path(*path_components) final_path.mkdir( parents=True, exist_ok=True ) # Create the path if it doesn't exist # TODO: append the final path to PathRepository return final_path
def _construct_version(self, account: str) -> str: version = self.versions.get_latest_version(account) return datetime.strptime(version.version_date, "%Y-%m-%d").strftime("%Y%m%d") @property def supply_raw(self): date_supply = self._construct_version("supply") folder_supply = "supply_" + date_supply return self.compose( self.base_paths.exiobase4 / "Supply" / "Raw" / folder_supply, version_date=None, ) @property def supply_intermediate(self): date_supply_interm = self._construct_version("supply intermediate") folder_supply_final = "supply_" + date_supply_interm return self.compose( self.base_paths.exiobase4 / "Supply" / "Intermediate" / folder_supply_final, version_date=None, ) @property def balance_raw(self): date_supply_interm = self._construct_version("supply intermediate") return self.compose( self.base_paths.exiobase4 / "Balanced" / date_supply_interm, version_date=None, ) @property def use_raw(self): date_use = self._construct_version("use") folder_use_raw = "use_" + date_use return self.compose( self.base_paths.exiobase4 / "Use" / "Raw" / folder_use_raw, version_date=None, ) @property def use_intermediate(self): date_use_interm = self._construct_version("use intermediate") folder_use_final = "use_" + date_use_interm return self.compose( self.base_paths.exiobase4 / "Use" / "Intermediate" / folder_use_final, version_date=None, ) @property def hiot_raw(self): date_hiot = self._construct_version("hiot") folder_hiot_raw = "Raw_" + date_hiot return self.compose( self.base_paths.exiobase4 / "HIOT" / "Raw" / folder_hiot_raw, version_date=None, ) @property def hiot_interm(self): date_hiot = self._construct_version("hiot") return self.compose( self.base_paths.exiobase4 / "HIOT" / "Intermediate" / date_hiot, version_date=None, ) @property def prod_markets(self): date_prod_markets = self._construct_version("market of products") folder_prod_markets = "product_markets_" + date_prod_markets return self.compose( self.base_paths.exiobase4 / "Markets" / "Raw" / folder_prod_markets, version_date=None, ) @property def trade_raw(self): date_trade_merged = self._construct_version("trade_merged") folder_trade_raw = "raw_data_" + date_trade_merged return self.compose( self.base_paths.exiobase4 / "Trade" / "Raw" / folder_trade_raw, version_date=None, ) @property def trade_intermediate(self): date_trade_int = self._construct_version("trade intermediate") folder_trade_int = "trade_" + date_trade_int return self.compose( self.base_paths.exiobase4 / "Trade" / "Intermediate" / folder_trade_int, version_date=None, ) @property def emissions_intermediate(self): date_emissions = self._construct_version("emissions") folder_emissions = "emissions_" + date_emissions return self.compose( self.base_paths.exiobase4 / "Emissions" / "Intermediate" / folder_emissions, version_date=None, ) @property def emissions_raw(self): date_emissions = self._construct_version("emissions") folder_emissions = "emissions_" + date_emissions return self.compose( self.base_paths.exiobase4 / "Emissions" / "Raw" / folder_emissions, version_date=None, ) @property def emissions_coeff(self): return self.correspondence.parent / "Emissions_coeffs" @property def fao_collection(self): return self.base_paths.data_collection / "Faostat" @property def fao_processed(self): return self.data_collection / "Faostat" / "processed" @property def fao_store(self): return self.base_paths.data_collection / "Faostat" / "store" @property def ferts_collection(self): return self.base_paths.data_collection / "Fertiliser data" @property def iLUC_param(self): return self.base_paths.data_collection / "iLUC" @property def dm_coeff(self): return self.base_paths.data_collection / "Dry_matter" @property def iLUC_raw(self): date_iluc = self._construct_version("iluc") folder_iLUC = "Input_data_" + date_iluc return self.compose( self.base_paths.exiobase4 / "iLUC" / "Raw" / folder_iLUC, version_date=None, ) @property def iLUC_interm(self): date_iluc = self._construct_version("iluc") folder_iLUC = "Derived_data_" + date_iluc return self.compose( self.base_paths.exiobase4 / "iLUC" / "Intermediate" / folder_iLUC, version_date=None, ) @property def land_use(self): date_land = self._construct_version("land use") folder_land_use = "Raw_" + date_land return self.compose( self.base_paths.exiobase4 / "Land" / "Raw" / folder_land_use, version_date=None, ) @property def fertilisers(self): date_ferts = self._construct_version("fertilisers_prod") folder_fers = "Raw_" + date_ferts return self.compose( self.base_paths.exiobase4 / "Fertilisers" / "Raw" / folder_fers, version_date=None, ) @property def fert_interm(self): date_ferts = self._construct_version("fertilisers_prod") folder_fers = "Raw_" + date_ferts return self.compose( self.base_paths.exiobase4 / "Fertilisers" / "Intermediate" / folder_fers, version_date=None, ) @property def property_param(self): date_property = self._construct_version("property matrix") folder_proper = "Raw_" + date_property return self.compose( self.base_paths.exiobase4 / "Properties" / folder_proper, version_date=None ) @property def ipcc_param(self): return self.base_paths.data_collection / "IPCC" @property def gams_inputs(self): return self.base_paths.data_collection / "GAMS" / "data_source_for_GAMS" @property def corresp_fao(self): return self.correspondence / "fao" @property def hiot_with_iluc(self): date_hiot = self._construct_version("hiot") folder_iluc_out = "source_data_" + date_hiot return self.compose( self.base_paths.exiobase4 / "HIOT" / "HIOT_with_iluc" / folder_iluc_out, version_date=None, ) @property def hiot_with_marg_elect(self): date_hiot = self._construct_version("hiot") folder_elec_out = "source_data_" + date_hiot return self.compose( self.base_paths.exiobase4 / "HIOT" / "HIOT_with_marg_electricity" / folder_elec_out, version_date=None, ) @property def hiot_with_capital(self): date_hiot = self._construct_version("hiot") folder_capital_out = "source_data_" + date_hiot return self.compose( self.base_paths.exiobase4 / "HIOT" / "b2_version" / folder_capital_out, version_date=None, ) @property def value_added(self): date_hiot = self._construct_version("hiot") folder_value_added = "source_data_" + date_hiot return self.compose( self.base_paths.exiobase4 / "HIOT" / "value_added" / folder_value_added, version_date=None, ) @property def matrix_of_invest(self): date_hiot = self._construct_version("hiot") folder_capital_mat = "source_data_" + date_hiot return self.compose( self.base_paths.exiobase4 / "HIOT" / "b2_version" / "matrix_investments" / folder_capital_mat, version_date=None, ) @property def iea_clean(self): return self.data_clean / "IEA" @property def iea_raw_exio(self): date_iea = self._construct_version("IEA") folder_iea = "raw_data_" + date_iea return self.compose( self.base_paths.exiobase4 / "IEA" / "Raw" / folder_iea, version_date=None ) @property def iea_interm(self): date_iea = self._construct_version("IEA") folder_iea_interm = "iea_data_" + date_iea return self.compose( self.base_paths.exiobase4 / "IEA" / "Intermediate" / folder_iea_interm, version_date=None, ) @property def lci_raw(self): return self.base_paths.data_collection / "LCI" @property def lci_cleaned(self): return self.compose(self.data_clean, account_name="LCI", version_date=None) @property def lci_vehicles(self): return ( self.base_paths.data_collection / "Product_statistics" / "Vehicles" / "ready_for_exiobase" ) @property def lci_products(self): return self.base_paths.data_collection / "Product_statistics" @property def lci_concito(self): return self.base_paths.data_collection / "Concito_DB" @property def lci_exio4(self): date_use = self._construct_version("use") folder_lci = "Raw_" + date_use return self.compose( self.base_paths.exiobase4 / "Product_statistics" / folder_lci, version_date=None, ) @property def heat_markets(self): date_heatmarket = self._construct_version("heatmarket") folder_heatmarket = "heat_markets_" + date_heatmarket return self.compose( self.base_paths.exiobase4 / "Markets" / "Raw" / folder_heatmarket, version_date=None, ) @property def fish_markets(self): date_fish = self._construct_version("fish prod mix") folder_fishmarket = "fish_prod_mix_" + date_fish return self.compose( self.base_paths.exiobase4 / "Markets" / "Raw" / folder_fishmarket, version_date=None, ) @property def waste_accounts(self): date_waste = self._construct_version("waste supply") folder_waste = "Raw_" + date_waste return self.compose( self.base_paths.exiobase4 / "Waste" / "Raw" / folder_waste, version_date=None, ) @property def waste_markets(self): date_wastemarkets = self._construct_version("waste markets") folder_waste_markets = "waste_markets_" + date_wastemarkets return self.compose( self.base_paths.exiobase4 / "Markets" / "Raw" / folder_waste_markets, version_date=None, ) @property def monetary_tables(self): return self.base_paths.data_collection / "MRSUT_2016" @property def property_values(self): date_property = self._construct_version("property matrix") folder_property = "Raw_" + date_property return self.compose( self.base_paths.exiobase4 / "Properties" / folder_property, version_date=None, ) @property def prices(self): return self.compose( self.base_paths.exiobase4, account_name="prices", version_date="latest", ) @property def cleaned_exio_3(self): return self.compose( self.base_paths.data_clean / "Exiobase_v3", version_date=None ) @property def un_data(self): return self.compose( self.base_paths.data_collection / "UN_commodities", version_date=None ) @property def un_data_elab(self): date_un = self._construct_version("UNcd") folder_un = "intermediate_" + date_un return self.compose( self.base_paths.exiobase4 / "UN_data" / folder_un, version_date=None ) @property def b2_version(self): date_hiot = self._construct_version("hiot") folder_b2_version = "data_" + date_hiot return self.compose( self.base_paths.exiobase4 / "HIOT" / "b2_simapro_version" / folder_b2_version, version_date=None, ) @property def outlier(self): date_hiot = self._construct_version("hiot") folder_outlier = "version_" + date_hiot return self.compose( self.base_paths.exiobase4 / "analyses" / "outlier_analysis" / folder_outlier ) @property def simapro(self) -> Path: return self.compose(self.base_paths.exiobase4 / "SimaPro") @property def cement(self): # TODO: Why cement have the same version as hiot? date_hiot = self._construct_version("hiot") folder_cement = "version_" + date_hiot return self.compose( self.base_paths.exiobase4 / "parameterized_functions" / "cement" / folder_cement ) @property def cement_data(self): return self.compose( self.base_paths.exiobase4 / "parameterized_functions" / "cement_data" ) @property def natural_resource(self) -> Path: date_raw = self._construct_version("natural_resource") folder_res = "raw_data_" + date_raw return self.compose( self.base_paths.exiobase4 / "Resource_extraction" / folder_res, version_date=None, ) @property def cleaned_fao(self) -> Path: return self.data_clean / "Faostat" / "new format" @property def cleaned_forestry(self): date_forestry = self._construct_version("forestry") folder_fores = "cleaned_data_" + date_forestry return self.compose( self.base_paths.exiobase4 / "Faostat" / "forestry" / folder_fores, version_date=None, ) @property def lci_fish(self): date_lci_fish = self._construct_version("lci_fish") folder_lci_fish = "raw_data_" + date_lci_fish return self.compose( self.base_paths.exiobase4 / "LCI" / "aquaculture" / folder_lci_fish, version_date=None, ) @property def fao_raw(self): date_lci_fish = self._construct_version("lci_fish") folder_fao = "raw_data_" + date_lci_fish return self.compose( self.base_paths.exiobase4 / "Faostat" / folder_fao, version_date=None ) @property def lci_country(self): return self.exiobase4 / "LCI" / "country_activ_specific" @property def lci_act_generic(self): return self.exiobase4 / "LCI" / "default_act_specific" @property def lci_prod_generic(self): return self.exiobase4 / "LCI" / "default_prod_specific" @property def trade_route(self): date_trade_route = self._construct_version("trade_route") folder_trade_route = "trade_route_" + date_trade_route return self.compose( self.base_paths.exiobase4 / "trade_route" / "raw" / folder_trade_route, version_date=None, )
[docs] def list_path_attributes(self): import inspect paths = [] for name, _ in inspect.getmembers( self.__class__, lambda v: isinstance(v, property) ): paths.append(name) return paths